diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 09d9ce348f..20a8434150 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1709,7 +1709,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "This many percentage of rows will be estimated as number of nulls in absence of statistics."), HIVESTATSAUTOGATHER("hive.stats.autogather", true, "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), - HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", false, + HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", true, "A flag to gather column statistics automatically."), HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("custom", "fs"), "The storage that stores temporary Hive statistics. In filesystem based statistics collection ('fs'), \n" + diff --git contrib/src/test/results/clientpositive/serde_typedbytes.q.out contrib/src/test/results/clientpositive/serde_typedbytes.q.out index c844a70627..c9d0fb6b63 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes.q.out @@ -72,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +121,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git contrib/src/test/results/clientpositive/serde_typedbytes2.q.out contrib/src/test/results/clientpositive/serde_typedbytes2.q.out index c0228aa4ff..715dc95f8b 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes2.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes2.q.out @@ -68,6 +68,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -91,6 +117,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: smallint, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git contrib/src/test/results/clientpositive/serde_typedbytes3.q.out contrib/src/test/results/clientpositive/serde_typedbytes3.q.out index 0b7541ac13..b66d5043b9 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes3.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes3.q.out @@ -68,6 +68,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -91,6 +117,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git contrib/src/test/results/clientpositive/serde_typedbytes4.q.out contrib/src/test/results/clientpositive/serde_typedbytes4.q.out index 981ff21bb3..45a209223a 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes4.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes4.q.out @@ -77,6 +77,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +111,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 PREHOOK: query: FROM ( FROM src diff --git contrib/src/test/results/clientpositive/serde_typedbytes5.q.out contrib/src/test/results/clientpositive/serde_typedbytes5.q.out index 5a7df3c837..4beb35a243 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes5.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes5.q.out @@ -72,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +121,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git data/conf/hive-site.xml data/conf/hive-site.xml index a205b8c569..05d6e8ac0e 100644 --- data/conf/hive-site.xml +++ data/conf/hive-site.xml @@ -302,12 +302,15 @@ true - hive.llap.io.allocator.direct false + + hive.stats.column.autogather + true + hive.materializedview.rewriting diff --git data/conf/spark/local/hive-site.xml data/conf/spark/local/hive-site.xml index 8bade0fa6e..fe822e50f5 100644 --- data/conf/spark/local/hive-site.xml +++ data/conf/spark/local/hive-site.xml @@ -255,4 +255,9 @@ Internal marker for test. Used for masking env-dependent values + + hive.stats.column.autogather + false + + diff --git data/conf/spark/standalone/hive-site.xml data/conf/spark/standalone/hive-site.xml index 90112e94d0..b54e51eb84 100644 --- data/conf/spark/standalone/hive-site.xml +++ data/conf/spark/standalone/hive-site.xml @@ -255,4 +255,9 @@ Internal marker for test. Used for masking env-dependent values + + hive.stats.column.autogather + false + + diff --git data/conf/spark/yarn-client/hive-site.xml data/conf/spark/yarn-client/hive-site.xml index bebca88bf1..b58a7a9627 100644 --- data/conf/spark/yarn-client/hive-site.xml +++ data/conf/spark/yarn-client/hive-site.xml @@ -295,4 +295,9 @@ 30000ms + + hive.stats.column.autogather + false + + diff --git hbase-handler/src/test/queries/positive/hbase_handler_snapshot.q hbase-handler/src/test/queries/positive/hbase_handler_snapshot.q index ebdc63ca47..dd4fe0f278 100644 --- hbase-handler/src/test/queries/positive/hbase_handler_snapshot.q +++ hbase-handler/src/test/queries/positive/hbase_handler_snapshot.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=true; SET hive.hbase.snapshot.name=src_hbase_snapshot; SET hive.hbase.snapshot.restoredir=/tmp; diff --git itests/hive-blobstore/src/test/results/clientpositive/explain.q.out itests/hive-blobstore/src/test/results/clientpositive/explain.q.out index cae2a131b1..5d95dbd967 100644 --- itests/hive-blobstore/src/test/results/clientpositive/explain.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/explain.q.out @@ -76,6 +76,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.blobstore_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(cnt, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -90,6 +110,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: cnt + Column Types: int + Table: default.blobstore_table PREHOOK: query: SELECT * FROM blobstore_table PREHOOK: type: QUERY diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out index daf95c30af..bbd81d125f 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out @@ -211,6 +211,11 @@ STAGE PLANS: Stats Work Basic Stats Work: Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: false PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out index 5349210a78..315aedbf1a 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out @@ -73,7 +73,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -95,6 +95,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: id + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(id, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -134,6 +150,35 @@ STAGE PLANS: name: default.values__tmp__table__3 Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -147,7 +192,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -171,6 +216,11 @@ STAGE PLANS: Stats Work Basic Stats Work: Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -186,7 +236,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -217,7 +267,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -239,7 +289,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -271,7 +321,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -305,7 +355,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -336,7 +386,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -358,7 +408,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out index 6e95fd123c..17db9dbd44 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out @@ -107,7 +107,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -129,7 +129,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -358,7 +358,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -380,7 +380,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out index 42b9821196..2192e15179 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out @@ -229,6 +229,11 @@ STAGE PLANS: Stats Work Basic Stats Work: Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: false PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out index cae1a5b518..10911a528b 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out @@ -81,7 +81,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -103,6 +103,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: id + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(id, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -142,6 +158,35 @@ STAGE PLANS: name: default.values__tmp__table__3 Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -155,7 +200,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -179,6 +224,11 @@ STAGE PLANS: Stats Work Basic Stats Work: Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -194,7 +244,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -225,7 +275,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -247,7 +297,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -279,7 +329,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -313,7 +363,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -344,7 +394,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -366,7 +416,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id diff --git itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out index 45e6d25b03..879e68a64a 100644 --- itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out @@ -214,6 +214,41 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 424 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -248,6 +283,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key + Column Types: int + Table: default.blobstore_table + Is Table Level Stats: true PREHOOK: query: EXPLAIN EXTENDED FROM hdfs_table INSERT OVERWRITE TABLE blobstore_table SELECT hdfs_table.key GROUP BY hdfs_table.key ORDER BY hdfs_table.key PREHOOK: type: QUERY @@ -439,6 +479,41 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 424 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -473,6 +548,11 @@ STAGE PLANS: Stats Work Basic Stats Work: Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: key + Column Types: int + Table: default.blobstore_table + Is Table Level Stats: true PREHOOK: query: DROP TABLE hdfs_table PREHOOK: type: DROPTABLE diff --git pom.xml pom.xml index dfb29cee4b..26de2e0e9b 100644 --- pom.xml +++ pom.xml @@ -194,7 +194,7 @@ 1.7.10 4.0.4 3.0.0-SNAPSHOT - 0.9.1-SNAPSHOT + 0.9.0 0.92.0-incubating 2.2.0 2.0.0 diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1de3dd7230..b66817f65f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7151,6 +7151,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) // hive.stats.autogather=true // and it is an insert overwrite or insert into table if (dest_tab != null + && !dest_tab.isNonNative() && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) { diff --git ql/src/test/queries/clientpositive/alterColumnStatsPart.q ql/src/test/queries/clientpositive/alterColumnStatsPart.q index f46d9d5434..9bfc01f37e 100644 --- ql/src/test/queries/clientpositive/alterColumnStatsPart.q +++ ql/src/test/queries/clientpositive/alterColumnStatsPart.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.exec.dynamic.partition.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q index 1c868dcd15..7b2b3e4cb8 100644 --- ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q +++ ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q @@ -1,5 +1,5 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; -; set hive.exec.reducers.max = 1; diff --git ql/src/test/queries/clientpositive/bucket_map_join_tez2.q ql/src/test/queries/clientpositive/bucket_map_join_tez2.q index b79d89d67a..2ce859f7c6 100644 --- ql/src/test/queries/clientpositive/bucket_map_join_tez2.q +++ ql/src/test/queries/clientpositive/bucket_map_join_tez2.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/bucket_num_reducers.q ql/src/test/queries/clientpositive/bucket_num_reducers.q index 06f334e833..5c5008eea7 100644 --- ql/src/test/queries/clientpositive/bucket_num_reducers.q +++ ql/src/test/queries/clientpositive/bucket_num_reducers.q @@ -1,4 +1,4 @@ -; +set hive.stats.column.autogather=false; set hive.exec.mode.local.auto=false; set mapred.reduce.tasks = 10; diff --git ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q index 9cd9150211..a11e7f0445 100644 --- ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q +++ ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.stats.fetch.column.stats=true; set hive.compute.query.using.stats=true; set hive.mapred.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q index 5fa7aec900..2ae85e2bd6 100644 --- ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q +++ ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.stats.fetch.column.stats=true; set hive.compute.query.using.stats=true; diff --git ql/src/test/queries/clientpositive/combine1.q ql/src/test/queries/clientpositive/combine1.q index 3bcb8b19c1..b300830884 100644 --- ql/src/test/queries/clientpositive/combine1.q +++ ql/src/test/queries/clientpositive/combine1.q @@ -7,6 +7,8 @@ set mapred.max.split.size=256; set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; +set hive.stats.column.autogather=false; + -- SORT_QUERY_RESULTS create table combine1_1(key string, value string) stored as textfile; diff --git ql/src/test/queries/clientpositive/correlationoptimizer5.q ql/src/test/queries/clientpositive/correlationoptimizer5.q index 45b8cb955d..002fb12e22 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer5.q +++ ql/src/test/queries/clientpositive/correlationoptimizer5.q @@ -1,3 +1,5 @@ +set hive.stats.column.autogather=false; +-- Currently, a query with multiple FileSinkOperators are not supported. set hive.mapred.mode=nonstrict; CREATE TABLE T1(key INT, val STRING); LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/encryption_insert_values.q ql/src/test/queries/clientpositive/encryption_insert_values.q index 2dd3e9ad1d..c8d1d519f3 100644 --- ql/src/test/queries/clientpositive/encryption_insert_values.q +++ ql/src/test/queries/clientpositive/encryption_insert_values.q @@ -1,4 +1,5 @@ -- SORT_QUERY_RESULTS; +set hive.stats.column.autogather=false; DROP TABLE IF EXISTS encrypted_table PURGE; CREATE TABLE encrypted_table (key INT, value STRING) LOCATION '${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table'; @@ -12,4 +13,4 @@ select * from encrypted_table; -- this checks that we've actually created temp table data under encrypted_table folder describe formatted values__tmp__table__1; -CRYPTO DELETE_KEY --keyName key_128; \ No newline at end of file +CRYPTO DELETE_KEY --keyName key_128; diff --git ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q index 4dcea1f7ce..7159ad5995 100644 --- ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q +++ ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; --SORT_QUERY_RESULTS -- Java JCE must be installed in order to hava a key length of 256 bits diff --git ql/src/test/queries/clientpositive/encryption_move_tbl.q ql/src/test/queries/clientpositive/encryption_move_tbl.q index 0b7771cc4a..8d865aa6e8 100644 --- ql/src/test/queries/clientpositive/encryption_move_tbl.q +++ ql/src/test/queries/clientpositive/encryption_move_tbl.q @@ -1,4 +1,5 @@ -- SORT_QUERY_RESULTS; +set hive.stats.column.autogather=false; -- we're setting this so that TestNegaiveCliDriver.vm doesn't stop processing after ALTER TABLE fails; diff --git ql/src/test/queries/clientpositive/groupby1.q ql/src/test/queries/clientpositive/groupby1.q index a8c9a8dcf8..cd3a12b44e 100755 --- ql/src/test/queries/clientpositive/groupby1.q +++ ql/src/test/queries/clientpositive/groupby1.q @@ -1,3 +1,5 @@ +-- due to testMTQueries1 +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.map.aggr=false; diff --git ql/src/test/queries/clientpositive/groupby1_limit.q ql/src/test/queries/clientpositive/groupby1_limit.q index b8e389e511..6c40e19540 100644 --- ql/src/test/queries/clientpositive/groupby1_limit.q +++ ql/src/test/queries/clientpositive/groupby1_limit.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set mapred.reduce.tasks=31; diff --git ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q index 2b799f87eb..40976ee707 100644 --- ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q +++ ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q @@ -1,3 +1,6 @@ +set hive.stats.column.autogather=false; +-- due to L137 in LimitPushDownOptimization Not safe to continue for RS-GBY-GBY-LIM kind of pipelines. See HIVE-10607 for more. + set hive.multigroupby.singlereducer=true; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q index e404dd0f54..8955b6a651 100644 --- ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q +++ ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set tez.cartesian-product.max-parallelism=1; diff --git ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q index 6809b721be..e4170283f3 100644 --- ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q +++ ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q @@ -1,3 +1,12 @@ +set hive.stats.column.autogather=false; +-- sounds weird: +-- on master, when auto=true, hive.mapjoin.localtask.max.memory.usage will be 0.55 as there is a gby +-- L132 of LocalMapJoinProcFactory +-- when execute in CLI, hive.exec.submit.local.task.via.child is true and we can see the error +-- if set hive.exec.submit.local.task.via.child=false, we can see it. +-- with patch, we just merge the tasks. hive.exec.submit.local.task.via.child=false due to pom.xml setting +-- however, even after change it to true, it still fails. + set hive.mapred.mode=nonstrict; set hive.exec.infer.bucket.sort=true; set hive.exec.infer.bucket.sort.num.buckets.power.two=true; diff --git ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q index 6824c1c032..c0ddb8bce6 100644 --- ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q +++ ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.exec.infer.bucket.sort=true; set hive.exec.infer.bucket.sort.num.buckets.power.two=true; diff --git ql/src/test/queries/clientpositive/input11_limit.q ql/src/test/queries/clientpositive/input11_limit.q index 052a72ee68..211c37adc5 100644 --- ql/src/test/queries/clientpositive/input11_limit.q +++ ql/src/test/queries/clientpositive/input11_limit.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/input14_limit.q ql/src/test/queries/clientpositive/input14_limit.q index 7316752a6d..2f6e4e47c9 100644 --- ql/src/test/queries/clientpositive/input14_limit.q +++ ql/src/test/queries/clientpositive/input14_limit.q @@ -1,3 +1,5 @@ +set hive.stats.column.autogather=false; + CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; EXPLAIN diff --git ql/src/test/queries/clientpositive/join2.q ql/src/test/queries/clientpositive/join2.q index 8aedd561e2..c3c7c241e9 100644 --- ql/src/test/queries/clientpositive/join2.q +++ ql/src/test/queries/clientpositive/join2.q @@ -1,3 +1,5 @@ +-- due to testMTQueries1 +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/metadata_only_queries.q ql/src/test/queries/clientpositive/metadata_only_queries.q index 8581a46b2d..bcf320b0c5 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries.q +++ ql/src/test/queries/clientpositive/metadata_only_queries.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.compute.query.using.stats=true; diff --git ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q index 1af813e3ed..692c414354 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q +++ ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.stats.dbclass=fs; set hive.compute.query.using.stats=true; set hive.explain.user=false; diff --git ql/src/test/queries/clientpositive/multiMapJoin1.q ql/src/test/queries/clientpositive/multiMapJoin1.q index 5c49b4c64f..6e16af4617 100644 --- ql/src/test/queries/clientpositive/multiMapJoin1.q +++ ql/src/test/queries/clientpositive/multiMapJoin1.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook; diff --git ql/src/test/queries/clientpositive/orc_wide_table.q ql/src/test/queries/clientpositive/orc_wide_table.q index 422a3c24b1..d2ec3857d0 100644 --- ql/src/test/queries/clientpositive/orc_wide_table.q +++ ql/src/test/queries/clientpositive/orc_wide_table.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.stats.column.autogather=false; drop table if exists test_txt; drop table if exists test_orc; create table test_txt( diff --git ql/src/test/queries/clientpositive/partition_coltype_literals.q ql/src/test/queries/clientpositive/partition_coltype_literals.q index eb56b1a93d..8da4876b70 100644 --- ql/src/test/queries/clientpositive/partition_coltype_literals.q +++ ql/src/test/queries/clientpositive/partition_coltype_literals.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.compute.query.using.stats=false; drop table if exists partcoltypenum; create table partcoltypenum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint); diff --git ql/src/test/queries/clientpositive/row__id.q ql/src/test/queries/clientpositive/row__id.q index d9cb7b0ff6..6aaa40f68f 100644 --- ql/src/test/queries/clientpositive/row__id.q +++ ql/src/test/queries/clientpositive/row__id.q @@ -1,3 +1,5 @@ +-- tid is flaky when compute column stats +set hive.stats.column.autogather=false; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; drop table if exists hello_acid; diff --git ql/src/test/queries/clientpositive/smb_join_partition_key.q ql/src/test/queries/clientpositive/smb_join_partition_key.q index 160bf5e36a..23027f8aa5 100644 --- ql/src/test/queries/clientpositive/smb_join_partition_key.q +++ ql/src/test/queries/clientpositive/smb_join_partition_key.q @@ -1,3 +1,5 @@ +--because p1 is decimal, in derby, when it retrieves partition with decimal, it will use partval = 100.0, rather than 100. As a result, the partition will not be found and it throws exception. +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; SET hive.enforce.sortmergebucketmapjoin=false; SET hive.auto.convert.sortmerge.join=true; diff --git ql/src/test/queries/clientpositive/stats_only_null.q ql/src/test/queries/clientpositive/stats_only_null.q index 7fd22c5d5d..fa83c6fb53 100644 --- ql/src/test/queries/clientpositive/stats_only_null.q +++ ql/src/test/queries/clientpositive/stats_only_null.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.stats.dbclass=fs; diff --git ql/src/test/queries/clientpositive/tez_smb_1.q ql/src/test/queries/clientpositive/tez_smb_1.q index 4e8d1c03fa..1ae35ef4bc 100644 --- ql/src/test/queries/clientpositive/tez_smb_1.q +++ ql/src/test/queries/clientpositive/tez_smb_1.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/tez_smb_main.q ql/src/test/queries/clientpositive/tez_smb_main.q index e4ab75a9be..6bace67d74 100644 --- ql/src/test/queries/clientpositive/tez_smb_main.q +++ ql/src/test/queries/clientpositive/tez_smb_main.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/udf_round_2.q ql/src/test/queries/clientpositive/udf_round_2.q index 43988c1225..38885a97d4 100644 --- ql/src/test/queries/clientpositive/udf_round_2.q +++ ql/src/test/queries/clientpositive/udf_round_2.q @@ -1,4 +1,5 @@ set hive.fetch.task.conversion=more; +set hive.stats.column.autogather=false; -- test for NaN (not-a-number) create table tstTbl1(n double); diff --git ql/src/test/queries/clientpositive/union_remove_26.q ql/src/test/queries/clientpositive/union_remove_26.q index 4e77d730ff..92ccbc3a52 100644 --- ql/src/test/queries/clientpositive/union_remove_26.q +++ ql/src/test/queries/clientpositive/union_remove_26.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.stats.autogather=true; -- This is to test the union remove optimization with stats optimization diff --git ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q index 3c53853912..81d6541674 100644 --- ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q +++ ql/src/test/queries/clientpositive/vector_auto_smb_mapjoin_14.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; diff --git ql/src/test/queries/clientpositive/vector_bucket.q ql/src/test/queries/clientpositive/vector_bucket.q index b67592ef01..25d3b5657b 100644 --- ql/src/test/queries/clientpositive/vector_bucket.q +++ ql/src/test/queries/clientpositive/vector_bucket.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; diff --git ql/src/test/queries/clientpositive/vector_char_4.q ql/src/test/queries/clientpositive/vector_char_4.q index edc59cda7d..df72ac1e50 100644 --- ql/src/test/queries/clientpositive/vector_char_4.q +++ ql/src/test/queries/clientpositive/vector_char_4.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; @@ -50,4 +51,4 @@ insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s fro -- insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; --- select count(*) as cnt from char_lazy_binary_columnar group by cs order by cnt asc; \ No newline at end of file +-- select count(*) as cnt from char_lazy_binary_columnar group by cs order by cnt asc; diff --git ql/src/test/queries/clientpositive/vector_char_simple.q ql/src/test/queries/clientpositive/vector_char_simple.q index c315241908..70b0900852 100644 --- ql/src/test/queries/clientpositive/vector_char_simple.q +++ ql/src/test/queries/clientpositive/vector_char_simple.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vector_groupby_rollup1.q ql/src/test/queries/clientpositive/vector_groupby_rollup1.q index 17858ff7ec..39bc2c1283 100644 --- ql/src/test/queries/clientpositive/vector_groupby_rollup1.q +++ ql/src/test/queries/clientpositive/vector_groupby_rollup1.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reduce.enabled=true; diff --git ql/src/test/queries/clientpositive/vector_multi_insert.q ql/src/test/queries/clientpositive/vector_multi_insert.q index c56ee1c4aa..e6bfb96794 100644 --- ql/src/test/queries/clientpositive/vector_multi_insert.q +++ ql/src/test/queries/clientpositive/vector_multi_insert.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vector_tablesample_rows.q ql/src/test/queries/clientpositive/vector_tablesample_rows.q index bb3c5a43f6..e748e9084b 100644 --- ql/src/test/queries/clientpositive/vector_tablesample_rows.q +++ ql/src/test/queries/clientpositive/vector_tablesample_rows.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.cli.print.header=true; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; @@ -35,4 +36,4 @@ create temporary table dual as select 1; create temporary table dual as select 1; -select * from dual; \ No newline at end of file +select * from dual; diff --git ql/src/test/queries/clientpositive/vector_udf_character_length.q ql/src/test/queries/clientpositive/vector_udf_character_length.q index 19a5260ddc..e49a091b34 100644 --- ql/src/test/queries/clientpositive/vector_udf_character_length.q +++ ql/src/test/queries/clientpositive/vector_udf_character_length.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vector_udf_octet_length.q ql/src/test/queries/clientpositive/vector_udf_octet_length.q index 06a49852a2..af4c7c4a7f 100644 --- ql/src/test/queries/clientpositive/vector_udf_octet_length.q +++ ql/src/test/queries/clientpositive/vector_udf_octet_length.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vector_varchar_4.q ql/src/test/queries/clientpositive/vector_varchar_4.q index 80f84d8b9f..b3402d0df2 100644 --- ql/src/test/queries/clientpositive/vector_varchar_4.q +++ ql/src/test/queries/clientpositive/vector_varchar_4.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; @@ -50,4 +51,4 @@ insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s -- insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; --- select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc; \ No newline at end of file +-- select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc; diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q index 6f753a748d..352ec3aebc 100644 --- ql/src/test/queries/clientpositive/vector_varchar_simple.q +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vector_windowing_expressions.q ql/src/test/queries/clientpositive/vector_windowing_expressions.q index 6a37c4ed57..3a72900231 100644 --- ql/src/test/queries/clientpositive/vector_windowing_expressions.q +++ ql/src/test/queries/clientpositive/vector_windowing_expressions.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.cli.print.header=true; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reduce.enabled=true; diff --git ql/src/test/results/clientpositive/acid_table_stats.q.out ql/src/test/results/clientpositive/acid_table_stats.q.out index d0fbcac5b0..d7776c8852 100644 --- ql/src/test/results/clientpositive/acid_table_stats.q.out +++ ql/src/test/results/clientpositive/acid_table_stats.q.out @@ -91,7 +91,6 @@ Database: default Table: acid #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {} numFiles 2 numRows 0 rawDataSize 0 @@ -383,7 +382,6 @@ Database: default Table: acid #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {} numFiles 4 numRows 1000 rawDataSize 208000 @@ -662,7 +660,7 @@ Database: default Table: acid #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 176000 diff --git ql/src/test/results/clientpositive/alterColumnStats.q.out ql/src/test/results/clientpositive/alterColumnStats.q.out index 09f2fe1008..6136392368 100644 --- ql/src/test/results/clientpositive/alterColumnStats.q.out +++ ql/src/test/results/clientpositive/alterColumnStats.q.out @@ -72,7 +72,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} numFiles 1 numRows 1 rawDataSize 8 @@ -115,7 +115,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} #### A masked pattern was here #### numFiles 1 numRows 1 @@ -151,7 +151,7 @@ num_trues num_falses bitVector comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} PREHOOK: query: desc formatted p c2 PREHOOK: type: DESCTABLE PREHOOK: Input: default@p @@ -170,4 +170,4 @@ num_trues num_falses bitVector comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} diff --git ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out index 3479ac31a0..2d6aca5f7a 100644 --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out @@ -75,7 +75,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -170,7 +170,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -265,7 +265,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -360,7 +360,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -455,7 +455,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -550,7 +550,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -645,7 +645,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -740,7 +740,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -835,7 +835,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out index 537e776cc3..60d3eba81b 100644 --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out @@ -84,7 +84,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -139,7 +139,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -263,7 +263,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -348,7 +348,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 12 numRows 500 diff --git ql/src/test/results/clientpositive/alter_partition_coltype.q.out ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 72322d98f3..361b196fbd 100644 --- ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -231,7 +231,7 @@ STAGE PLANS: dt 100 ts 3.0 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -277,7 +277,7 @@ STAGE PLANS: dt 100 ts 6.30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -465,7 +465,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -534,7 +534,7 @@ STAGE PLANS: partcol1 2 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git ql/src/test/results/clientpositive/alter_table_add_partition.q.out ql/src/test/results/clientpositive/alter_table_add_partition.q.out index ca941dd860..a8b4d5b60b 100644 --- ql/src/test/results/clientpositive/alter_table_add_partition.q.out +++ ql/src/test/results/clientpositive/alter_table_add_partition.q.out @@ -189,7 +189,7 @@ Database: default Table: mp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 diff --git ql/src/test/results/clientpositive/alter_table_serde2.q.out ql/src/test/results/clientpositive/alter_table_serde2.q.out index 1c798b04e8..81b1f5b28b 100644 --- ql/src/test/results/clientpositive/alter_table_serde2.q.out +++ ql/src/test/results/clientpositive/alter_table_serde2.q.out @@ -75,7 +75,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -171,7 +171,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/analyze_table_null_partition.q.out ql/src/test/results/clientpositive/analyze_table_null_partition.q.out index 324b700497..d48df759c7 100644 --- ql/src/test/results/clientpositive/analyze_table_null_partition.q.out +++ ql/src/test/results/clientpositive/analyze_table_null_partition.q.out @@ -101,7 +101,7 @@ STAGE PLANS: partition values: age 15 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -146,7 +146,7 @@ STAGE PLANS: partition values: age 30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -191,7 +191,7 @@ STAGE PLANS: partition values: age 40 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -236,7 +236,7 @@ STAGE PLANS: partition values: age __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 20f870ea94..ae300aaab9 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from loc_orc where state='OH' @@ -87,17 +87,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (state = 'OH') (type: boolean) - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index 4e67841080..ed3d5942eb 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state @@ -108,22 +108,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -131,13 +131,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string), _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -153,7 +153,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -161,10 +161,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -747,30 +747,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/auto_join1.q.out ql/src/test/results/clientpositive/auto_join1.q.out index c2d24739ab..2edfb864e8 100644 --- ql/src/test/results/clientpositive/auto_join1.q.out +++ ql/src/test/results/clientpositive/auto_join1.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +45,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +109,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/auto_join14.q.out ql/src/test/results/clientpositive/auto_join14.q.out index e67506b3b8..52aaa023c5 100644 --- ql/src/test/results/clientpositive/auto_join14.q.out +++ ql/src/test/results/clientpositive/auto_join14.q.out @@ -15,13 +15,14 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src @@ -44,7 +45,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +109,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value diff --git ql/src/test/results/clientpositive/auto_join17.q.out ql/src/test/results/clientpositive/auto_join17.q.out index 0239cf8285..20adc30831 100644 --- ql/src/test/results/clientpositive/auto_join17.q.out +++ ql/src/test/results/clientpositive/auto_join17.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +45,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +109,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* diff --git ql/src/test/results/clientpositive/auto_join19.q.out ql/src/test/results/clientpositive/auto_join19.q.out index e70c5147fb..80615deddc 100644 --- ql/src/test/results/clientpositive/auto_join19.q.out +++ ql/src/test/results/clientpositive/auto_join19.q.out @@ -17,13 +17,14 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +111,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/auto_join19_inclause.q.out ql/src/test/results/clientpositive/auto_join19_inclause.q.out index e70c5147fb..80615deddc 100644 --- ql/src/test/results/clientpositive/auto_join19_inclause.q.out +++ ql/src/test/results/clientpositive/auto_join19_inclause.q.out @@ -17,13 +17,14 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +111,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/auto_join2.q.out ql/src/test/results/clientpositive/auto_join2.q.out index 4132caf7fe..63fc65a7b6 100644 --- ql/src/test/results/clientpositive/auto_join2.q.out +++ ql/src/test/results/clientpositive/auto_join2.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -62,7 +63,7 @@ STAGE PLANS: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -103,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -119,6 +135,32 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/auto_join26.q.out ql/src/test/results/clientpositive/auto_join26.q.out index 9b457415e1..91d79857c2 100644 --- ql/src/test/results/clientpositive/auto_join26.q.out +++ ql/src/test/results/clientpositive/auto_join26.q.out @@ -15,13 +15,14 @@ INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +128,32 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key diff --git ql/src/test/results/clientpositive/auto_join3.q.out ql/src/test/results/clientpositive/auto_join3.q.out index 5921da093c..01ba7f662a 100644 --- ql/src/test/results/clientpositive/auto_join3.q.out +++ ql/src/test/results/clientpositive/auto_join3.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -64,7 +65,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -99,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +131,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/auto_join4.q.out ql/src/test/results/clientpositive/auto_join4.q.out index fc34040d24..4b08f937e3 100644 --- ql/src/test/results/clientpositive/auto_join4.q.out +++ ql/src/test/results/clientpositive/auto_join4.q.out @@ -37,13 +37,14 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +67,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +131,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join5.q.out ql/src/test/results/clientpositive/auto_join5.q.out index fe4e24e80d..03262fecde 100644 --- ql/src/test/results/clientpositive/auto_join5.q.out +++ ql/src/test/results/clientpositive/auto_join5.q.out @@ -37,13 +37,14 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -66,7 +67,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +131,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join6.q.out ql/src/test/results/clientpositive/auto_join6.q.out index 594e7a44e4..d8c58d44f9 100644 --- ql/src/test/results/clientpositive/auto_join6.q.out +++ ql/src/test/results/clientpositive/auto_join6.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +128,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join7.q.out ql/src/test/results/clientpositive/auto_join7.q.out index 13a5ff507d..b426b8ceee 100644 --- ql/src/test/results/clientpositive/auto_join7.q.out +++ ql/src/test/results/clientpositive/auto_join7.q.out @@ -49,7 +49,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +127,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -140,6 +156,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join8.q.out ql/src/test/results/clientpositive/auto_join8.q.out index 704ce3c31d..f4544ad3b9 100644 --- ql/src/test/results/clientpositive/auto_join8.q.out +++ ql/src/test/results/clientpositive/auto_join8.q.out @@ -37,13 +37,14 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +67,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -102,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -118,6 +134,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join9.q.out ql/src/test/results/clientpositive/auto_join9.q.out index e5022a3d29..b4aa03ea1a 100644 --- ql/src/test/results/clientpositive/auto_join9.q.out +++ ql/src/test/results/clientpositive/auto_join9.q.out @@ -15,13 +15,14 @@ FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +45,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +109,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' diff --git ql/src/test/results/clientpositive/auto_join_reordering_values.q.out ql/src/test/results/clientpositive/auto_join_reordering_values.q.out index 9182127c1e..97954bf98c 100644 --- ql/src/test/results/clientpositive/auto_join_reordering_values.q.out +++ ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -141,7 +141,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -162,7 +162,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -277,7 +277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -298,7 +298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -414,7 +414,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -435,7 +435,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -551,7 +551,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -572,7 +572,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid diff --git ql/src/test/results/clientpositive/basicstat_partval.q.out ql/src/test/results/clientpositive/basicstat_partval.q.out index fa5c5baf8e..1e46d5825d 100644 --- ql/src/test/results/clientpositive/basicstat_partval.q.out +++ ql/src/test/results/clientpositive/basicstat_partval.q.out @@ -77,7 +77,7 @@ Database: default Table: p1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 @@ -113,7 +113,7 @@ Database: default Table: p1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\"}} numFiles 1 numRows 2 rawDataSize 2 diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out index b76e7a8221..0312b218d3 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out @@ -49,7 +49,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -93,7 +94,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -172,6 +173,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -205,6 +232,83 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: test_db_smb_mapjoin_11.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY @@ -1828,7 +1932,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1937,7 +2041,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out index 6def3de813..857eb709dd 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out @@ -129,7 +129,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -207,6 +207,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: test_db_smb_mapjoin_12.test_table3 + Is Table Level Stats: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY @@ -254,7 +259,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -299,7 +305,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -379,6 +385,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -413,6 +445,83 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: test_db_smb_mapjoin_12.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out index 49ff6355b3..3bc00c8973 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out @@ -112,7 +112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,7 +135,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -292,7 +292,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -315,7 +315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out index a2a8660c3b..7a6f8c53a5 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out @@ -648,6 +648,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -671,6 +692,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: test_db_smb_mapjoin_7.smb_join_results Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/binary_output_format.q.out ql/src/test/results/clientpositive/binary_output_format.q.out index ce85351c91..2f72ae9769 100644 --- ql/src/test/results/clientpositive/binary_output_format.q.out +++ ql/src/test/results/clientpositive/binary_output_format.q.out @@ -117,6 +117,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string) + outputColumnNames: mydata + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(mydata, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -168,6 +184,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -211,6 +256,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: mydata + Column Types: string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/bucket2.q.out ql/src/test/results/clientpositive/bucket2.q.out index 4bee31b05c..c57769e266 100644 --- ql/src/test/results/clientpositive/bucket2.q.out +++ ql/src/test/results/clientpositive/bucket2.q.out @@ -129,6 +129,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -163,6 +198,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket2_1 select * from src diff --git ql/src/test/results/clientpositive/bucket3.q.out ql/src/test/results/clientpositive/bucket3.q.out index 49f12f7fe9..a2109ee208 100644 --- ql/src/test/results/clientpositive/bucket3.q.out +++ ql/src/test/results/clientpositive/bucket3.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -127,6 +128,34 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -160,6 +189,87 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src diff --git ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out index f210c5a4d2..3545f865a2 100644 --- ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out @@ -117,13 +117,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -197,7 +198,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -257,6 +258,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -393,6 +421,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value @@ -452,13 +554,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -532,7 +635,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -571,7 +674,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -592,6 +695,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -705,7 +835,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -728,6 +858,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out index c6d607916f..71a9f2fdba 100644 --- ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out @@ -101,13 +101,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -181,7 +182,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +242,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -377,6 +405,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value @@ -436,13 +538,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -516,7 +619,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -555,7 +658,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -576,6 +679,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -689,7 +819,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -712,6 +842,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out index 52c17ccd32..eb9b1d537c 100644 --- ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out @@ -101,13 +101,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -181,7 +182,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +242,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -377,6 +405,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value @@ -436,13 +538,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -516,7 +619,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -555,7 +658,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -576,6 +679,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -689,7 +819,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -712,6 +842,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out index 5743944b4c..688fdfa125 100644 --- ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out @@ -189,7 +189,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -212,7 +212,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -239,7 +239,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -262,7 +262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -289,7 +289,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -312,7 +312,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -545,7 +545,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -568,7 +568,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -618,7 +618,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -645,7 +645,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -668,7 +668,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key diff --git ql/src/test/results/clientpositive/bucketmapjoin13.q.out ql/src/test/results/clientpositive/bucketmapjoin13.q.out index 71b2924af2..406cca455a 100644 --- ql/src/test/results/clientpositive/bucketmapjoin13.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin13.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -192,7 +192,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name value column.name.delimiter , @@ -241,7 +241,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -371,7 +371,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -480,7 +480,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -615,7 +615,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -724,7 +724,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -859,7 +859,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -968,7 +968,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/bucketmapjoin5.q.out ql/src/test/results/clientpositive/bucketmapjoin5.q.out index 1096912a26..21937f4d5a 100644 --- ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -272,6 +272,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -376,6 +392,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -418,6 +463,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -793,7 +843,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -814,6 +864,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -918,6 +984,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -937,7 +1032,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -960,6 +1055,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -975,7 +1075,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1005,7 +1105,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1026,7 +1126,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1062,7 +1162,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1092,7 +1192,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1113,7 +1213,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index 15286edcb2..f6652b8400 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -212,6 +212,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -265,6 +281,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -307,6 +352,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index 3c171d6577..095d559acd 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -275,6 +275,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -328,6 +344,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -370,6 +415,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out index 2d6bd6f709..b59c4bc90a 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out @@ -84,6 +84,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from @@ -190,6 +194,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * from @@ -296,6 +304,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') @@ -314,7 +326,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -346,6 +359,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -362,6 +391,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') @@ -417,4 +479,8 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out index 7eb36bfe33..52ef3dbb29 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out @@ -80,6 +80,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.value, x.key from @@ -151,7 +155,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -183,6 +188,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), '1' (type: string) + outputColumnNames: value, key, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -199,6 +220,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out index 7efb7cef02..eaf85c384e 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out @@ -65,39 +65,13 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -106,7 +80,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -121,8 +95,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -134,6 +106,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, key2, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -150,99 +136,35 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-5 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key, concat(a.value, b.value) @@ -340,43 +262,13 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -389,7 +281,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -401,8 +293,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) @@ -414,6 +304,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -430,105 +334,35 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 - - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.value diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out index 6c10249dea..661114d3a6 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out @@ -65,43 +65,13 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -114,7 +84,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -129,8 +99,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -142,6 +110,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -158,111 +140,35 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 - - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -347,43 +253,13 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:test_table2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:test_table2 - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -396,7 +272,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -411,8 +287,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -424,6 +298,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -440,111 +328,35 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:test_table1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:test_table1 - TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 - - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out index 7f56cf3836..dae491ac48 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out @@ -65,43 +65,13 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -114,7 +84,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -129,8 +99,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: int), _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -142,6 +110,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -158,111 +140,35 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4 - Select Operator - expressions: _col0 (type: int), _col3 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: int), _col2 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 - - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4 - Select Operator - expressions: _col0 (type: int), _col3 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: int), _col2 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, b.key, concat(a.value, b.value) @@ -274,9 +180,6 @@ PREHOOK: Input: default@test_table1@ds=1 PREHOOK: Input: default@test_table2 PREHOOK: Input: default@test_table2@ds=1 PREHOOK: Output: default@test_table3@ds=1 -Hive Runtime Error: Map local work failed -FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, b.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -345,43 +248,13 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -394,7 +267,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -409,8 +282,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: int), _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -422,6 +293,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -438,111 +323,35 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4 - Select Operator - expressions: _col3 (type: int), _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: int), _col2 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 - - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4 - Select Operator - expressions: _col3 (type: int), _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: int), _col2 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT b.key, a.key, concat(a.value, b.value) @@ -554,9 +363,6 @@ PREHOOK: Input: default@test_table1@ds=1 PREHOOK: Input: default@test_table2 PREHOOK: Input: default@test_table2@ds=1 PREHOOK: Output: default@test_table3@ds=1 -Hive Runtime Error: Map local work failed -FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT b.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b diff --git ql/src/test/results/clientpositive/case_sensitivity.q.out ql/src/test/results/clientpositive/case_sensitivity.q.out index c2f2f8b39f..c77994d4cb 100644 --- ql/src/test/results/clientpositive/case_sensitivity.q.out +++ ql/src/test/results/clientpositive/case_sensitivity.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 837 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +95,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/cast1.q.out ql/src/test/results/clientpositive/cast1.q.out index a0cd142bb9..9c23a76e47 100644 --- ql/src/test/results/clientpositive/cast1.q.out +++ ql/src/test/results/clientpositive/cast1.q.out @@ -44,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: string), _col6 (type: int) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +93,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7 + Column Types: int, double, double, double, int, string, int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index eac491eeb2..3d92a0d4b7 100644 --- ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: state, locid, zip, year - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state @@ -108,22 +108,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sq1:loc_orc - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -131,13 +131,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: state, locid, $f2 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(locid) keys: state (type: string), $f2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -153,7 +153,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -161,10 +161,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, $f2, $f2_0 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -739,30 +739,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, zip - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out index d3747afd9a..5fbc878e88 100644 --- ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out +++ ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -44,7 +45,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +109,36 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* diff --git ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out index e5d4421ba7..68628fe345 100644 --- ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out @@ -21,7 +21,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -79,6 +95,36 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 @@ -132,7 +178,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -176,6 +223,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -190,6 +252,36 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out index 177eefd4ea..bece89f0d5 100644 --- ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -74,6 +74,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -88,6 +108,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/constprog_dp.q.out ql/src/test/results/clientpositive/constprog_dp.q.out index 86b6c326d2..4ed3ebf666 100644 --- ql/src/test/results/clientpositive/constprog_dp.q.out +++ ql/src/test/results/clientpositive/constprog_dp.q.out @@ -43,6 +43,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -68,6 +102,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/cp_sel.q.out ql/src/test/results/clientpositive/cp_sel.q.out index 910003778a..d467b18140 100644 --- ql/src/test/results/clientpositive/cp_sel.q.out +++ ql/src/test/results/clientpositive/cp_sel.q.out @@ -81,7 +81,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -113,6 +114,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testpartbucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -130,6 +147,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.testpartbucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/deleteAnalyze.q.out ql/src/test/results/clientpositive/deleteAnalyze.q.out index 1d051fd15e..783039b509 100644 --- ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -47,7 +47,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} numFiles 1 numRows 2 rawDataSize 634 @@ -73,17 +73,17 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 col_name amount data_type decimal(10,3) -min -max -num_nulls -distinct_count +min 12.123 +max 123.123 +num_nulls 0 +distinct_count 2 avg_col_len max_col_len num_trues num_falses -bitVector +bitVector HL comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 diff --git ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index d199574b29..b4efaf4511 100644 --- ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -61,7 +61,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -130,7 +130,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -199,7 +199,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -245,7 +245,7 @@ STAGE PLANS: partcol1 1 partcol2 __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out index 8199235519..884e63c44d 100644 --- ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out @@ -71,4 +71,8 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.non_acid diff --git ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out index f1654d425b..20e753e0c9 100644 --- ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out +++ ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -616,7 +616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/explain_ddl.q.out ql/src/test/results/clientpositive/explain_ddl.q.out index f0e54c5153..d985d52e68 100644 --- ql/src/test/results/clientpositive/explain_ddl.q.out +++ ql/src/test/results/clientpositive/explain_ddl.q.out @@ -523,6 +523,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.m1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -546,6 +572,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.m1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index 25f5372426..deda5a0b04 100644 --- ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -119,7 +119,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -164,7 +164,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -232,7 +232,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -277,7 +277,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip diff --git ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index 5f74166e89..a5d172479a 100644 --- ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -145,7 +145,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -190,7 +190,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -235,7 +235,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -280,7 +280,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -322,12 +322,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d @@ -348,7 +348,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -393,7 +393,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -438,7 +438,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -483,7 +483,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -525,12 +525,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state @@ -579,7 +579,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -624,7 +624,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -669,7 +669,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -714,7 +714,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -782,7 +782,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -827,7 +827,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -872,7 +872,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -917,7 +917,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -959,12 +959,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: create table if not exists loc_orc_2d ( @@ -1068,7 +1068,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1114,7 +1114,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1160,7 +1160,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1206,7 +1206,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1298,7 +1298,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1344,7 +1344,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1390,7 +1390,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1436,7 +1436,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1528,7 +1528,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1570,12 +1570,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_2d @@ -1597,7 +1597,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1643,7 +1643,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1689,7 +1689,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1735,7 +1735,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1827,7 +1827,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1873,7 +1873,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1919,7 +1919,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1965,7 +1965,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2057,7 +2057,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2099,11 +2099,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git ql/src/test/results/clientpositive/filter_join_breaktask.q.out ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 9d00ce6786..884bfcdacc 100644 --- ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -38,22 +38,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: g + alias: f Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: false @@ -70,13 +70,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: _col0 (type: int) + value expressions: _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -89,7 +89,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -129,16 +129,16 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g, $hdt$_1:m] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:f, $hdt$_2:m] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -150,8 +150,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col2 - columns.types string,int + columns _col0,_col3 + columns.types int,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -165,31 +165,31 @@ STAGE PLANS: TableScan GatherStats: false Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col3 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col0 (type: string) + value expressions: _col0 (type: int) auto parallelism: false TableScan - alias: f + alias: g Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: (value <> '') (type: boolean) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false @@ -203,8 +203,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col2 - columns.types string,int + columns _col0,_col3 + columns.types int,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -213,8 +213,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: column.name.delimiter , - columns _col0,_col2 - columns.types string,int + columns _col0,_col3 + columns.types int,string escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -226,7 +226,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:f] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -274,12 +274,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col3 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col5 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: int), _col0 (type: string) + expressions: _col0 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 255 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/groupby10.q.out ql/src/test/results/clientpositive/groupby10.q.out index 9f90a2ca3c..79e4fd2cb8 100644 --- ql/src/test/results/clientpositive/groupby10.q.out +++ ql/src/test/results/clientpositive/groupby10.q.out @@ -44,11 +44,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -119,6 +123,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -133,12 +147,68 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -157,7 +227,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -186,6 +256,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -197,9 +277,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -291,11 +411,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -366,6 +490,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -380,12 +514,68 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -404,7 +594,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -433,6 +623,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -444,9 +644,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -537,9 +777,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -578,6 +820,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: int) @@ -596,6 +848,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -610,6 +872,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -621,9 +917,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key diff --git ql/src/test/results/clientpositive/groupby11.q.out ql/src/test/results/clientpositive/groupby11.q.out index 2ab4c39c89..592be1df85 100644 --- ql/src/test/results/clientpositive/groupby11.q.out +++ ql/src/test/results/clientpositive/groupby11.q.out @@ -32,11 +32,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -107,6 +111,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -123,12 +137,77 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: substr(value, 5) (type: string), key (type: string) sort order: ++ Map-reduce partition columns: substr(value, 5) (type: string) @@ -147,7 +226,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -176,6 +255,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -189,9 +278,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 partition(ds='111') diff --git ql/src/test/results/clientpositive/groupby12.q.out ql/src/test/results/clientpositive/groupby12.q.out index 2f633f01a7..cd0b7fbfb8 100644 --- ql/src/test/results/clientpositive/groupby12.q.out +++ ql/src/test/results/clientpositive/groupby12.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +79,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key diff --git ql/src/test/results/clientpositive/groupby1_map.q.out ql/src/test/results/clientpositive/groupby1_map.q.out index 337c2e0fe1..82fe1f0d93 100644 --- ql/src/test/results/clientpositive/groupby1_map.q.out +++ ql/src/test/results/clientpositive/groupby1_map.q.out @@ -15,7 +15,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +89,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby1_map_nomap.q.out ql/src/test/results/clientpositive/groupby1_map_nomap.q.out index 337c2e0fe1..82fe1f0d93 100644 --- ql/src/test/results/clientpositive/groupby1_map_nomap.q.out +++ ql/src/test/results/clientpositive/groupby1_map_nomap.q.out @@ -15,7 +15,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +89,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby1_map_skew.q.out ql/src/test/results/clientpositive/groupby1_map_skew.q.out index a140a0211f..44b4db70c8 100644 --- ql/src/test/results/clientpositive/groupby1_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby1_map_skew.q.out @@ -16,7 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -84,6 +85,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,6 +114,32 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby1_noskew.q.out ql/src/test/results/clientpositive/groupby1_noskew.q.out index 702ea3e701..ba4366dff8 100644 --- ql/src/test/results/clientpositive/groupby1_noskew.q.out +++ ql/src/test/results/clientpositive/groupby1_noskew.q.out @@ -15,7 +15,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -67,6 +78,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest_g1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby2_map.q.out ql/src/test/results/clientpositive/groupby2_map.q.out index 427590d2ba..262c5d8f75 100644 --- ql/src/test/results/clientpositive/groupby2_map.q.out +++ ql/src/test/results/clientpositive/groupby2_map.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +91,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out index 0ab198501d..581e958cd8 100644 --- ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +91,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) @@ -120,7 +162,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -164,6 +207,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -178,6 +236,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby2_map_skew.q.out ql/src/test/results/clientpositive/groupby2_map_skew.q.out index bb5a09f4b3..02f5b47460 100644 --- ql/src/test/results/clientpositive/groupby2_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby2_map_skew.q.out @@ -18,7 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -86,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +116,32 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby2_noskew.q.out ql/src/test/results/clientpositive/groupby2_noskew.q.out index 782a1a2a75..29a71f1710 100644 --- ql/src/test/results/clientpositive/groupby2_noskew.q.out +++ ql/src/test/results/clientpositive/groupby2_noskew.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +79,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out index 9a070aca3f..f1ce8388d7 100644 --- ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +80,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby3.q.out ql/src/test/results/clientpositive/groupby3.q.out index 2f2b533d8a..7c97174830 100644 --- ql/src/test/results/clientpositive/groupby3.q.out +++ ql/src/test/results/clientpositive/groupby3.q.out @@ -36,7 +36,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -73,6 +74,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: @@ -93,6 +95,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -107,6 +124,32 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_map.q.out ql/src/test/results/clientpositive/groupby3_map.q.out index 1bbf23dd5b..edad22b93f 100644 --- ql/src/test/results/clientpositive/groupby3_map.q.out +++ ql/src/test/results/clientpositive/groupby3_map.q.out @@ -77,6 +77,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +111,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out index 248a31746c..20344640e6 100644 --- ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out @@ -81,6 +81,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,6 +115,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_map_skew.q.out ql/src/test/results/clientpositive/groupby3_map_skew.q.out index 8ba3a5865a..e53e62c2ce 100644 --- ql/src/test/results/clientpositive/groupby3_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby3_map_skew.q.out @@ -100,6 +100,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -114,6 +134,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_noskew.q.out ql/src/test/results/clientpositive/groupby3_noskew.q.out index 5c520bbdd1..1aa4cb6ce6 100644 --- ql/src/test/results/clientpositive/groupby3_noskew.q.out +++ ql/src/test/results/clientpositive/groupby3_noskew.q.out @@ -70,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,6 +100,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out index c5d121b4f9..bb964e66d6 100644 --- ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out @@ -74,6 +74,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -88,6 +104,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby4.q.out ql/src/test/results/clientpositive/groupby4.q.out index e8e7b88412..34f2099792 100644 --- ql/src/test/results/clientpositive/groupby4.q.out +++ ql/src/test/results/clientpositive/groupby4.q.out @@ -18,7 +18,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +74,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -86,6 +98,54 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby4_map.q.out ql/src/test/results/clientpositive/groupby4_map.q.out index 647242aaa4..4ed785f0fa 100644 --- ql/src/test/results/clientpositive/groupby4_map.q.out +++ ql/src/test/results/clientpositive/groupby4_map.q.out @@ -53,6 +53,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +87,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby4_map_skew.q.out ql/src/test/results/clientpositive/groupby4_map_skew.q.out index 0857f22bbc..e6ae7e8c23 100644 --- ql/src/test/results/clientpositive/groupby4_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby4_map_skew.q.out @@ -53,6 +53,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +87,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby4_noskew.q.out ql/src/test/results/clientpositive/groupby4_noskew.q.out index 5827ef06ba..0fa7046230 100644 --- ql/src/test/results/clientpositive/groupby4_noskew.q.out +++ ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +50,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -63,6 +74,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby5.q.out ql/src/test/results/clientpositive/groupby5.q.out index 995d30e9a1..1677ab3a02 100644 --- ql/src/test/results/clientpositive/groupby5.q.out +++ ql/src/test/results/clientpositive/groupby5.q.out @@ -22,7 +22,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -84,6 +86,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,6 +110,54 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) diff --git ql/src/test/results/clientpositive/groupby5_map.q.out ql/src/test/results/clientpositive/groupby5_map.q.out index 393c4e2d54..d04eb44b3c 100644 --- ql/src/test/results/clientpositive/groupby5_map.q.out +++ ql/src/test/results/clientpositive/groupby5_map.q.out @@ -55,6 +55,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +89,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby5_map_skew.q.out ql/src/test/results/clientpositive/groupby5_map_skew.q.out index 67767f5153..6fa3fc1188 100644 --- ql/src/test/results/clientpositive/groupby5_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby5_map_skew.q.out @@ -55,6 +55,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +89,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby5_noskew.q.out ql/src/test/results/clientpositive/groupby5_noskew.q.out index dfecdb5cef..9cd1cccdcf 100644 --- ql/src/test/results/clientpositive/groupby5_noskew.q.out +++ ql/src/test/results/clientpositive/groupby5_noskew.q.out @@ -21,7 +21,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +84,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) diff --git ql/src/test/results/clientpositive/groupby6.q.out ql/src/test/results/clientpositive/groupby6.q.out index afc23ecf9f..a8cd73852f 100644 --- ql/src/test/results/clientpositive/groupby6.q.out +++ ql/src/test/results/clientpositive/groupby6.q.out @@ -18,7 +18,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +74,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -86,6 +98,54 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/groupby6_map.q.out ql/src/test/results/clientpositive/groupby6_map.q.out index c0c8841adc..1d89c1a7fc 100644 --- ql/src/test/results/clientpositive/groupby6_map.q.out +++ ql/src/test/results/clientpositive/groupby6_map.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +84,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/groupby6_map_skew.q.out ql/src/test/results/clientpositive/groupby6_map_skew.q.out index 5bb8695de6..bc7ee51593 100644 --- ql/src/test/results/clientpositive/groupby6_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -18,7 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -91,6 +107,32 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/groupby6_noskew.q.out ql/src/test/results/clientpositive/groupby6_noskew.q.out index 0cafbd1256..bf818a0935 100644 --- ql/src/test/results/clientpositive/groupby6_noskew.q.out +++ ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +50,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -63,6 +74,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/groupby7_map.q.out ql/src/test/results/clientpositive/groupby7_map.q.out index b7b2c8ce5e..76d1aa4f9b 100644 --- ql/src/test/results/clientpositive/groupby7_map.q.out +++ ql/src/test/results/clientpositive/groupby7_map.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -90,6 +92,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,12 +121,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -134,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -145,9 +211,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out index 6f88027bc4..4c477974b4 100644 --- ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out @@ -27,9 +27,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -69,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -87,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,6 +133,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-1 Move Operator @@ -112,9 +178,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby7_map_skew.q.out ql/src/test/results/clientpositive/groupby7_map_skew.q.out index 17eb283633..a98511a002 100644 --- ql/src/test/results/clientpositive/groupby7_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby7_map_skew.q.out @@ -28,11 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -116,6 +118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -130,12 +147,46 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) @@ -155,7 +206,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -184,6 +235,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -195,9 +261,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby7_noskew.q.out ql/src/test/results/clientpositive/groupby7_noskew.q.out index 2303b3366b..7309ec281b 100644 --- ql/src/test/results/clientpositive/groupby7_noskew.q.out +++ ql/src/test/results/clientpositive/groupby7_noskew.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -78,6 +80,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -92,12 +104,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) @@ -122,6 +168,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -133,9 +189,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out index ff29aec5ef..486979fef0 100644 --- ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out @@ -28,10 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-8 + Stage-5 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-1, Stage-5, Stage-8 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -108,6 +110,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -122,12 +134,46 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -153,6 +199,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -164,9 +220,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 - Stats Work - Basic Stats Work: + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 diff --git ql/src/test/results/clientpositive/groupby8.q.out ql/src/test/results/clientpositive/groupby8.q.out index 826064cd53..03c6a18b97 100644 --- ql/src/test/results/clientpositive/groupby8.q.out +++ ql/src/test/results/clientpositive/groupby8.q.out @@ -28,11 +28,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -103,6 +107,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -117,12 +131,68 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -141,7 +211,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -170,6 +240,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -181,9 +261,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -851,11 +971,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -926,6 +1050,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -940,12 +1074,68 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -964,7 +1154,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -993,6 +1183,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1004,9 +1204,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby8_map.q.out ql/src/test/results/clientpositive/groupby8_map.q.out index abe813c2b6..5022edbe75 100644 --- ql/src/test/results/clientpositive/groupby8_map.q.out +++ ql/src/test/results/clientpositive/groupby8_map.q.out @@ -27,9 +27,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +70,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +132,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-1 Move Operator @@ -111,9 +177,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby8_map_skew.q.out ql/src/test/results/clientpositive/groupby8_map_skew.q.out index a4de8b4e06..51cfb9b03a 100644 --- ql/src/test/results/clientpositive/groupby8_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby8_map_skew.q.out @@ -28,11 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -115,6 +117,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -129,12 +146,46 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) @@ -153,7 +204,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -182,6 +233,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -193,9 +259,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby8_noskew.q.out ql/src/test/results/clientpositive/groupby8_noskew.q.out index abe813c2b6..eabc90b3a7 100644 --- ql/src/test/results/clientpositive/groupby8_noskew.q.out +++ ql/src/test/results/clientpositive/groupby8_noskew.q.out @@ -27,9 +27,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +70,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +98,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +122,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-1 Move Operator @@ -111,9 +167,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby9.q.out ql/src/test/results/clientpositive/groupby9.q.out index 28032e324f..2daa452479 100644 --- ql/src/test/results/clientpositive/groupby9.q.out +++ ql/src/test/results/clientpositive/groupby9.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -103,12 +120,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -132,6 +183,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -143,9 +209,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -813,10 +897,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -875,6 +961,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -889,12 +990,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -918,6 +1053,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -929,9 +1079,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -1599,10 +1767,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -1661,6 +1831,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1675,12 +1860,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -1704,6 +1923,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1715,9 +1949,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -2385,10 +2637,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -2448,6 +2702,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -2462,12 +2731,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -2492,6 +2795,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -2503,9 +2821,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -3173,10 +3509,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -3235,6 +3573,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -3249,12 +3602,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -3278,6 +3665,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -3289,9 +3691,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index 38243592b4..94f1ac0528 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -551,11 +551,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -640,6 +642,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -654,12 +671,46 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -679,7 +730,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -709,6 +760,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -720,9 +786,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube diff --git ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out index 0f77b3d1ca..9a6457cef2 100644 --- ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out +++ ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out @@ -33,10 +33,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -92,6 +94,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,12 +123,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) @@ -131,6 +182,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -142,7 +208,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/groupby_map_ppr.q.out ql/src/test/results/clientpositive/groupby_map_ppr.q.out index e0952c8cdf..1a3d0e6448 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +199,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +259,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index 3c53a58455..02cb101d0a 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +199,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +259,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2168 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2216 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2216 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out index 6606edb9a1..be442dc1c9 100644 --- ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -103,12 +120,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double) @@ -132,6 +183,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -143,9 +209,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table dest1 select key, count(distinct value) group by key diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out index daa047db85..000caa660f 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out @@ -27,9 +27,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -71,6 +73,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +109,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,6 +138,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_g2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, int + Table: default.dest_g3 Stage: Stage-1 Move Operator @@ -117,9 +183,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index 496580b969..a0ad67b5da 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -39,9 +39,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -83,6 +85,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0) IN (400, 450) and (VALUE._col0) IN ('val_400', 'val_500')) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -104,6 +121,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +150,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -129,9 +195,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table e1 @@ -207,9 +291,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -251,6 +337,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((((VALUE._col0 - 100) = 500) and KEY._col0 is not null) or ((VALUE._col0 + VALUE._col0) = 400)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -272,6 +373,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -286,6 +402,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -297,9 +447,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table e1 @@ -375,9 +543,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -419,6 +589,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0) IN (400, 450) and (VALUE._col0) IN ('val_400', 'val_500')) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -440,6 +625,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -454,6 +654,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -465,9 +699,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table e1 @@ -543,9 +795,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -587,6 +841,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((((VALUE._col0 - 100) = 500) and KEY._col0 is not null) or ((VALUE._col0 + VALUE._col0) = 400)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -608,6 +877,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -622,6 +906,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -633,9 +951,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table e1 diff --git ql/src/test/results/clientpositive/groupby_position.q.out ql/src/test/results/clientpositive/groupby_position.q.out index 16924f3459..0af33d4fe3 100644 --- ql/src/test/results/clientpositive/groupby_position.q.out +++ ql/src/test/results/clientpositive/groupby_position.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -87,6 +89,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,12 +118,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -130,6 +181,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -141,9 +207,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 @@ -219,10 +303,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -279,6 +365,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -293,12 +394,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -322,6 +457,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -333,9 +483,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 diff --git ql/src/test/results/clientpositive/groupby_ppr.q.out ql/src/test/results/clientpositive/groupby_ppr.q.out index 8a18187eb7..4cf530e8c5 100644 --- ql/src/test/results/clientpositive/groupby_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +192,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -224,6 +247,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index 6e4501d022..ff5e74c24c 100644 --- ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +192,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -224,6 +247,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 @@ -285,7 +382,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -432,7 +530,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -453,6 +551,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -463,7 +583,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -486,6 +606,84 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2232 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/groupby_rollup1.q.out ql/src/test/results/clientpositive/groupby_rollup1.q.out index b7e93d9c25..5ccf8f2285 100644 --- ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -396,11 +396,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -485,6 +487,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -499,12 +516,46 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -524,7 +575,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -554,6 +605,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -565,9 +631,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup diff --git ql/src/test/results/clientpositive/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 80ec75aff5..cbfd6bf627 100644 --- ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -106,6 +106,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +177,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -203,6 +248,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -428,7 +478,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -556,6 +607,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -589,6 +667,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val @@ -668,7 +820,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -689,6 +841,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -744,6 +912,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -763,7 +960,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -786,6 +983,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -801,7 +1003,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -831,7 +1033,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -852,7 +1054,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -888,7 +1090,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -918,7 +1120,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -939,7 +1141,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1043,7 +1245,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1064,6 +1266,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1119,6 +1337,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1138,7 +1385,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1161,6 +1408,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1176,7 +1428,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1206,7 +1458,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1227,7 +1479,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1263,7 +1515,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1293,7 +1545,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1314,7 +1566,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1447,6 +1699,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1502,6 +1770,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1544,6 +1841,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1770,7 +2072,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1898,6 +2201,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1931,39 +2261,113 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: #### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -PREHOOK: type: QUERY + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 +8 1 18 1 +8 1 28 1 +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl3 +SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 +PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -1971,7 +2375,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2078,7 +2483,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2099,6 +2504,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2109,7 +2541,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2132,6 +2564,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -2174,7 +2680,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2291,7 +2798,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2312,6 +2819,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2322,7 +2856,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2345,6 +2879,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from @@ -2436,7 +3044,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2457,6 +3065,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2488,7 +3112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2509,6 +3133,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2564,6 +3204,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1, null-subquery2:$hdt$_0-subquery2:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2583,7 +3252,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2606,6 +3275,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -2621,7 +3295,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2651,7 +3325,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2672,7 +3346,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2708,7 +3382,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2738,7 +3412,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2759,7 +3433,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2999,7 +3673,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3020,6 +3694,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3039,7 +3729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3060,6 +3750,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3138,6 +3844,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3157,7 +3892,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3180,6 +3915,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -3195,7 +3935,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3225,7 +3965,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3246,7 +3986,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3282,7 +4022,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3312,7 +4052,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3333,7 +4073,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3418,7 +4158,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3550,7 +4291,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3571,6 +4312,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3581,7 +4349,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3604,6 +4372,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM @@ -3951,7 +4793,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3990,7 +4833,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4013,7 +4856,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4059,7 +4902,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4080,6 +4923,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4090,7 +4960,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4106,13 +4976,87 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats Work - Basic Stats Work: + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4190,7 +5134,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4211,6 +5155,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4220,7 +5180,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4243,7 +5203,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4266,6 +5226,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4285,7 +5274,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4308,6 +5297,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -4323,7 +5317,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4353,7 +5347,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4374,7 +5368,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4410,7 +5404,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4440,7 +5434,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4461,7 +5455,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4597,6 +5591,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(key4, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4606,7 +5616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4629,7 +5639,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4652,6 +5662,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4694,6 +5733,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -4961,7 +6005,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4982,6 +6026,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4991,7 +6051,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5014,7 +6074,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5037,6 +6097,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5056,7 +6145,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5079,6 +6168,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5094,7 +6188,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5124,7 +6218,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5145,7 +6239,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5181,7 +6275,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5211,7 +6305,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5232,7 +6326,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5353,7 +6447,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5374,6 +6468,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5383,7 +6493,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5406,7 +6516,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5429,6 +6539,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5448,7 +6587,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5471,6 +6610,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5486,7 +6630,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5516,7 +6660,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5537,7 +6681,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5573,7 +6717,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5603,7 +6747,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5624,7 +6768,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5719,9 +6863,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5769,6 +6915,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5788,6 +6949,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5802,6 +6978,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -5813,9 +7023,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key @@ -5876,9 +7104,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5929,6 +7159,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5948,6 +7193,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5962,6 +7222,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -5973,9 +7267,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key diff --git ql/src/test/results/clientpositive/groupby_sort_2.q.out ql/src/test/results/clientpositive/groupby_sort_2.q.out index 9d4143660b..25ed6b6f8f 100644 --- ql/src/test/results/clientpositive/groupby_sort_2.q.out +++ ql/src/test/results/clientpositive/groupby_sort_2.q.out @@ -45,7 +45,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,6 +120,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: val, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT val, count(1) FROM T1 GROUP BY val diff --git ql/src/test/results/clientpositive/groupby_sort_3.q.out ql/src/test/results/clientpositive/groupby_sort_3.q.out index 67b53223a8..be9a131014 100644 --- ql/src/test/results/clientpositive/groupby_sort_3.q.out +++ ql/src/test/results/clientpositive/groupby_sort_3.q.out @@ -81,6 +81,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +130,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce @@ -217,6 +247,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -240,6 +296,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/groupby_sort_4.q.out ql/src/test/results/clientpositive/groupby_sort_4.q.out index 0b269783a3..c2ab1bdb91 100644 --- ql/src/test/results/clientpositive/groupby_sort_4.q.out +++ ql/src/test/results/clientpositive/groupby_sort_4.q.out @@ -45,7 +45,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,6 +120,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key @@ -149,7 +191,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -193,6 +236,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -207,6 +265,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val diff --git ql/src/test/results/clientpositive/groupby_sort_5.q.out ql/src/test/results/clientpositive/groupby_sort_5.q.out index 41e1782724..7a7b8df4aa 100644 --- ql/src/test/results/clientpositive/groupby_sort_5.q.out +++ ql/src/test/results/clientpositive/groupby_sort_5.q.out @@ -81,6 +81,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +130,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce @@ -245,6 +275,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -268,6 +324,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce @@ -381,7 +441,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -426,6 +487,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -440,6 +516,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, count(1) FROM T1 GROUP BY key diff --git ql/src/test/results/clientpositive/groupby_sort_6.q.out ql/src/test/results/clientpositive/groupby_sort_6.q.out index aa3bc96176..871c7b31d9 100644 --- ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -102,6 +103,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -135,6 +163,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key @@ -176,7 +278,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -232,7 +335,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -253,6 +356,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -263,7 +393,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -286,6 +416,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key @@ -318,7 +522,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -421,7 +626,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -442,6 +647,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -452,7 +684,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -475,6 +707,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key diff --git ql/src/test/results/clientpositive/groupby_sort_7.q.out ql/src/test/results/clientpositive/groupby_sort_7.q.out index c9746c3126..a9e469b948 100644 --- ql/src/test/results/clientpositive/groupby_sort_7.q.out +++ ql/src/test/results/clientpositive/groupby_sort_7.q.out @@ -84,6 +84,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -107,6 +133,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 3b3e2276ca..f15c0f90ab 100644 --- ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -106,6 +106,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +177,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -203,6 +248,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -429,7 +479,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -625,6 +676,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -658,6 +736,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val @@ -737,7 +889,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -758,6 +910,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -813,6 +981,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -832,7 +1029,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -855,6 +1052,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -870,7 +1072,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -900,7 +1102,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -921,7 +1123,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -957,7 +1159,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -987,7 +1189,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1008,7 +1210,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1112,7 +1314,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1133,6 +1335,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1188,6 +1406,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1207,7 +1454,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1230,6 +1477,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1245,7 +1497,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1275,7 +1527,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1296,7 +1548,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1332,7 +1584,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1362,7 +1614,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1383,7 +1635,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1516,6 +1768,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1571,6 +1839,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1613,6 +1910,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1840,7 +2142,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2036,6 +2339,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2069,33 +2399,107 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: #### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: EXPLAIN EXTENDED @@ -2110,7 +2514,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2285,7 +2690,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2306,6 +2711,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2316,7 +2748,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2339,6 +2771,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -2382,7 +2888,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2567,7 +3074,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2588,6 +3095,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2598,7 +3132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2621,6 +3155,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT cast(key + key as string), sum(cnt) from @@ -2712,7 +3320,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2733,6 +3341,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2764,7 +3388,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2785,6 +3409,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2840,6 +3480,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1, null-subquery2:$hdt$_0-subquery2:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2859,7 +3528,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2882,6 +3551,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -2897,7 +3571,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2927,7 +3601,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2948,7 +3622,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2984,7 +3658,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3014,7 +3688,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3035,7 +3709,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3344,7 +4018,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3365,6 +4039,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3384,7 +4074,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3405,6 +4095,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3483,6 +4189,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3502,7 +4237,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3525,6 +4260,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -3540,7 +4280,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3570,7 +4310,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3591,7 +4331,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3627,7 +4367,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3657,7 +4397,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3678,7 +4418,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3763,7 +4503,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3895,7 +4636,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3916,6 +4657,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3926,7 +4694,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3949,6 +4717,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM @@ -4366,7 +5208,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -4405,7 +5248,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4428,7 +5271,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4542,7 +5385,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4563,6 +5406,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4573,7 +5443,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4589,13 +5459,87 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-3 - Stats Work - Basic Stats Work: + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4673,7 +5617,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4694,6 +5638,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4703,7 +5663,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4726,7 +5686,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4749,6 +5709,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4768,7 +5757,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4791,6 +5780,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -4806,7 +5800,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4836,7 +5830,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4857,7 +5851,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4893,7 +5887,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4923,7 +5917,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4944,7 +5938,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5080,6 +6074,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(key4, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5089,7 +6099,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5112,7 +6122,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5135,6 +6145,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5177,6 +6216,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5444,7 +6488,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5465,6 +6509,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5474,7 +6534,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5497,7 +6557,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5520,6 +6580,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5539,7 +6628,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5562,6 +6651,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5577,7 +6671,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5607,7 +6701,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5628,7 +6722,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5664,7 +6758,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5694,7 +6788,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5715,7 +6809,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5836,7 +6930,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5857,6 +6951,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5866,7 +6976,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5889,7 +6999,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5912,6 +7022,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5931,7 +7070,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5954,6 +7093,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5969,7 +7113,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5999,7 +7143,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6020,7 +7164,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6056,7 +7200,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6086,7 +7230,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6107,7 +7251,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6203,9 +7347,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6253,6 +7399,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6296,6 +7457,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6310,6 +7486,40 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -6321,9 +7531,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key @@ -6385,9 +7613,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6438,6 +7668,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6481,6 +7726,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6495,6 +7755,40 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -6506,9 +7800,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key diff --git ql/src/test/results/clientpositive/groupby_sort_test_1.q.out ql/src/test/results/clientpositive/groupby_sort_test_1.q.out index 1723e75467..ffe6515bbd 100644 --- ql/src/test/results/clientpositive/groupby_sort_test_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_test_1.q.out @@ -81,6 +81,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +130,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out index 1fa9e94aa7..a515847ed4 100644 --- ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out +++ ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out @@ -56,6 +56,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.implicit_cast_during_insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: c1, c2, p1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + keys: p1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -72,6 +93,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.implicit_cast_during_insert PREHOOK: query: insert overwrite table implicit_cast_during_insert partition (p1) select key, value, key key1 from (select * from src where key in (0,1)) q diff --git ql/src/test/results/clientpositive/index_auto_update.q.out ql/src/test/results/clientpositive/index_auto_update.q.out index bc444cc9fb..e48b657434 100644 --- ql/src/test/results/clientpositive/index_auto_update.q.out +++ ql/src/test/results/clientpositive/index_auto_update.q.out @@ -45,10 +45,10 @@ STAGE DEPENDENCIES: Stage-0 depends on stages: Stage-9, Stage-8, Stage-11 Stage-2 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1, Stage-4, Stage-5 Stage-4 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-0, Stage-4, Stage-5 Stage-5 depends on stages: Stage-2 - Stage-7 depends on stages: Stage-0 Stage-8 Stage-10 Stage-11 depends on stages: Stage-10 @@ -72,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-12 Conditional Operator @@ -140,11 +166,15 @@ STAGE PLANS: Stage: Stage-4 - Stage: Stage-5 - Stage: Stage-7 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: string, string + Table: default.temp + + Stage: Stage-5 Stage: Stage-8 Map Reduce diff --git ql/src/test/results/clientpositive/infer_bucket_sort.q.out ql/src/test/results/clientpositive/infer_bucket_sort.q.out index 8ed105405c..fee672b3e3 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -38,7 +38,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -87,7 +87,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -136,7 +136,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -185,7 +185,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -234,7 +234,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 4970 @@ -283,7 +283,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -332,7 +332,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -381,7 +381,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -430,7 +430,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -479,7 +479,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -528,7 +528,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -577,7 +577,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -626,7 +626,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -675,7 +675,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -724,7 +724,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -773,7 +773,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -822,7 +822,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -871,7 +871,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -920,7 +920,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 2964 @@ -969,7 +969,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 5 rawDataSize 19 @@ -1018,7 +1018,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1067,7 +1067,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1116,7 +1116,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1165,7 +1165,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1214,7 +1214,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 3582 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index cba50a758d..32e4513306 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -56,7 +56,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -94,7 +94,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -155,7 +155,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -193,7 +193,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -256,7 +256,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 155 rawDataSize 586 @@ -294,7 +294,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 154 rawDataSize 591 @@ -415,10 +415,11 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -462,6 +463,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_table + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -488,6 +505,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table Stage: Stage-3 Merge File Operator @@ -509,6 +530,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key, value, IF (key % 100 == 0, '11', '12') FROM (SELECT key, COUNT(*) AS value FROM srcpart @@ -555,7 +605,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 4 rawDataSize 14 @@ -593,7 +643,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 305 rawDataSize 1163 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index 8ae732096a..5f1d2647fe 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -68,6 +69,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,6 +101,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: SELECT key, value, count(1) FROM src GROUP BY ROLLUP (key, value) PREHOOK: type: QUERY @@ -746,7 +796,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 6309 @@ -1426,7 +1476,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 7547 @@ -1452,7 +1502,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1497,6 +1548,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1513,6 +1580,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE @@ -1548,7 +1648,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 9954 @@ -1601,7 +1701,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 11810 @@ -1627,7 +1727,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1672,6 +1773,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1688,6 +1805,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) @@ -1723,7 +1873,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 6054 @@ -1776,7 +1926,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 7290 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out index 98c062bd03..c701de9347 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out @@ -44,7 +44,7 @@ Database: default Table: list_bucketing_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 309 rawDataSize 1482 @@ -112,7 +112,7 @@ Database: default Table: list_bucketing_table2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out index f2218e3697..0711eb5fcd 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out @@ -91,6 +91,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -116,6 +150,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out Stage: Stage-3 Map Reduce @@ -179,7 +217,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -216,7 +254,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -280,6 +319,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -296,6 +351,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT a.key, a.value FROM ( @@ -341,7 +429,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 0 rawDataSize 0 @@ -401,6 +489,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -426,6 +542,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out Stage: Stage-4 Map Reduce @@ -491,7 +611,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -519,7 +639,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -564,6 +685,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -580,6 +715,35 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key @@ -617,7 +781,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2728 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out index 44a9435b92..694d15c4e9 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out @@ -38,7 +38,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -87,7 +87,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out index d915c30806..a10c06eba6 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out @@ -81,7 +81,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -173,7 +173,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2718 @@ -265,7 +265,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -357,7 +357,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2690 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out index 5c7659b7ba..cf332e2301 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -56,6 +57,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-04-08' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +90,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-04-08' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key2, value, cast(hr as int) FROM @@ -122,7 +172,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 670 rawDataSize 5888 @@ -160,7 +210,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 330 rawDataSize 2924 diff --git ql/src/test/results/clientpositive/innerjoin.q.out ql/src/test/results/clientpositive/innerjoin.q.out index 741c9fbbd4..138eb9390c 100644 --- ql/src/test/results/clientpositive/innerjoin.q.out +++ ql/src/test/results/clientpositive/innerjoin.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +105,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/input11.q.out ql/src/test/results/clientpositive/input11.q.out index 356cef120a..bb59b4f992 100644 --- ql/src/test/results/clientpositive/input11.q.out +++ ql/src/test/results/clientpositive/input11.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +95,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input12.q.out ql/src/test/results/clientpositive/input12.q.out index 8bb52c0cd1..8096c71411 100644 --- ql/src/test/results/clientpositive/input12.q.out +++ ql/src/test/results/clientpositive/input12.q.out @@ -39,24 +39,16 @@ STAGE DEPENDENCIES: Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-11, Stage-13 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 - Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 + Stage-1 depends on stages: Stage-3 + Stage-10 depends on stages: Stage-1, Stage-11, Stage-13 + Stage-11 depends on stages: Stage-3 + Stage-12 depends on stages: Stage-2, Stage-11, Stage-13 + Stage-2 depends on stages: Stage-3 + Stage-13 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -80,6 +72,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 200) and (key >= 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 200) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -110,6 +130,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -133,6 +182,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce @@ -164,15 +217,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -186,45 +230,40 @@ STAGE PLANS: Stage: Stage-10 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-12 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 Stage: Stage-2 Move Operator @@ -239,39 +278,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-16 - Stats Work - Basic Stats Work: - - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-19 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 diff --git ql/src/test/results/clientpositive/input13.q.out ql/src/test/results/clientpositive/input13.q.out index 9b1970b7ab..c64a4d56b4 100644 --- ql/src/test/results/clientpositive/input13.q.out +++ ql/src/test/results/clientpositive/input13.q.out @@ -41,30 +41,17 @@ STAGE DEPENDENCIES: Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 Stage-7 Stage-0 depends on stages: Stage-7, Stage-6, Stage-9 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-12, Stage-14, Stage-3 Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 - Stage-16 depends on stages: Stage-4 , consists of Stage-13, Stage-12, Stage-14 - Stage-13 - Stage-1 depends on stages: Stage-13, Stage-12, Stage-15 - Stage-11 depends on stages: Stage-1 - Stage-12 - Stage-14 - Stage-15 depends on stages: Stage-14 - Stage-22 depends on stages: Stage-4 , consists of Stage-19, Stage-18, Stage-20 - Stage-19 - Stage-2 depends on stages: Stage-19, Stage-18, Stage-21 - Stage-17 depends on stages: Stage-2 - Stage-18 - Stage-20 - Stage-21 depends on stages: Stage-20 - Stage-27 depends on stages: Stage-4 , consists of Stage-24, Stage-23, Stage-25 - Stage-24 - Stage-3 depends on stages: Stage-24, Stage-23, Stage-26 - Stage-23 - Stage-25 - Stage-26 depends on stages: Stage-25 + Stage-1 depends on stages: Stage-4 + Stage-11 depends on stages: Stage-1, Stage-12, Stage-14, Stage-3 + Stage-12 depends on stages: Stage-4 + Stage-13 depends on stages: Stage-2, Stage-12, Stage-14, Stage-3 + Stage-2 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 @@ -88,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 200) and (key >= 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -103,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((key < 300) and (key >= 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +133,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 300) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +163,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -155,6 +199,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-6 Map Reduce @@ -186,15 +234,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-16 - Conditional Operator - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -208,45 +247,40 @@ STAGE PLANS: Stage: Stage-11 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-12 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-15 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-22 - Conditional Operator - - Stage: Stage-19 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-13 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 Stage: Stage-2 Move Operator @@ -261,48 +295,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-17 - Stats Work - Basic Stats Work: - - Stage: Stage-18 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-21 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-27 - Conditional Operator - - Stage: Stage-24 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### Stage: Stage-3 Move Operator @@ -310,34 +330,6 @@ STAGE PLANS: hdfs directory: true destination: target/warehouse/dest4.out - Stage: Stage-23 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-25 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-26 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 diff --git ql/src/test/results/clientpositive/input14.q.out ql/src/test/results/clientpositive/input14.q.out index d8455267eb..516c68467c 100644 --- ql/src/test/results/clientpositive/input14.q.out +++ ql/src/test/results/clientpositive/input14.q.out @@ -27,7 +27,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -83,6 +99,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/input17.q.out ql/src/test/results/clientpositive/input17.q.out index 316c9c6c6b..f1ba03abb9 100644 --- ql/src/test/results/clientpositive/input17.q.out +++ ql/src/test/results/clientpositive/input17.q.out @@ -27,7 +27,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +96,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src_thrift diff --git ql/src/test/results/clientpositive/input18.q.out ql/src/test/results/clientpositive/input18.q.out index 4eb75ff55e..1be623ed9d 100644 --- ql/src/test/results/clientpositive/input18.q.out +++ ql/src/test/results/clientpositive/input18.q.out @@ -27,7 +27,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -83,6 +99,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/input1_limit.q.out ql/src/test/results/clientpositive/input1_limit.q.out index 649e8fb94b..1e898ea535 100644 --- ql/src/test/results/clientpositive/input1_limit.q.out +++ ql/src/test/results/clientpositive/input1_limit.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -52,7 +54,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Filter Operator predicate: (key < 100) (type: boolean) @@ -90,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,6 +120,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce @@ -111,8 +131,37 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -134,6 +183,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -145,9 +209,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 diff --git ql/src/test/results/clientpositive/input20.q.out ql/src/test/results/clientpositive/input20.q.out index 051e94ecf3..b931f9b368 100644 --- ql/src/test/results/clientpositive/input20.q.out +++ ql/src/test/results/clientpositive/input20.q.out @@ -33,7 +33,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -82,6 +83,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -96,6 +112,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/input30.q.out ql/src/test/results/clientpositive/input30.q.out index 90c9732117..84e4ae4786 100644 --- ql/src/test/results/clientpositive/input30.q.out +++ ql/src/test/results/clientpositive/input30.q.out @@ -64,6 +64,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -78,6 +98,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest30 PREHOOK: query: insert overwrite table dest30 select count(1) from src @@ -148,6 +172,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -162,6 +206,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a + Column Types: int + Table: default.dest30 PREHOOK: query: insert overwrite table dest30 select count(1) from src diff --git ql/src/test/results/clientpositive/input31.q.out ql/src/test/results/clientpositive/input31.q.out index d3c2c6af09..3ba7c5f87f 100644 --- ql/src/test/results/clientpositive/input31.q.out +++ ql/src/test/results/clientpositive/input31.q.out @@ -66,6 +66,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest31 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -80,6 +100,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest31 PREHOOK: query: insert overwrite table dest31 select count(1) from srcbucket diff --git ql/src/test/results/clientpositive/input32.q.out ql/src/test/results/clientpositive/input32.q.out index fa3b731a78..4da8c75e2c 100644 --- ql/src/test/results/clientpositive/input32.q.out +++ ql/src/test/results/clientpositive/input32.q.out @@ -63,6 +63,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest32 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -77,6 +97,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest32 PREHOOK: query: insert overwrite table dest32 select count(1) from srcbucket diff --git ql/src/test/results/clientpositive/input33.q.out ql/src/test/results/clientpositive/input33.q.out index c24ac6e41e..fe05db2c2c 100644 --- ql/src/test/results/clientpositive/input33.q.out +++ ql/src/test/results/clientpositive/input33.q.out @@ -33,7 +33,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -82,6 +83,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -96,6 +112,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/input34.q.out ql/src/test/results/clientpositive/input34.q.out index 841f0af16f..b6e0c054dd 100644 --- ql/src/test/results/clientpositive/input34.q.out +++ ql/src/test/results/clientpositive/input34.q.out @@ -64,6 +64,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +113,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input35.q.out ql/src/test/results/clientpositive/input35.q.out index 76a014eed9..7e89fb477f 100644 --- ql/src/test/results/clientpositive/input35.q.out +++ ql/src/test/results/clientpositive/input35.q.out @@ -64,6 +64,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +113,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input36.q.out ql/src/test/results/clientpositive/input36.q.out index f75f8fda5a..64e34cc539 100644 --- ql/src/test/results/clientpositive/input36.q.out +++ ql/src/test/results/clientpositive/input36.q.out @@ -64,6 +64,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +113,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input38.q.out ql/src/test/results/clientpositive/input38.q.out index 41a0dbd420..cd459078a2 100644 --- ql/src/test/results/clientpositive/input38.q.out +++ ql/src/test/results/clientpositive/input38.q.out @@ -58,6 +58,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -81,6 +107,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input3_limit.q.out ql/src/test/results/clientpositive/input3_limit.q.out index d187ee2818..15e5dfd9fa 100644 --- ql/src/test/results/clientpositive/input3_limit.q.out +++ ql/src/test/results/clientpositive/input3_limit.q.out @@ -40,7 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -78,7 +79,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 1 Data size: 11603 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -95,6 +95,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 11603 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -109,6 +124,32 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/input5.q.out ql/src/test/results/clientpositive/input5.q.out index 75a827c5da..ceb28c0bc6 100644 --- ql/src/test/results/clientpositive/input5.q.out +++ ql/src/test/results/clientpositive/input5.q.out @@ -27,7 +27,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +96,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src_thrift diff --git ql/src/test/results/clientpositive/input6.q.out ql/src/test/results/clientpositive/input6.q.out index 35e746ae8c..f8183cdea6 100644 --- ql/src/test/results/clientpositive/input6.q.out +++ ql/src/test/results/clientpositive/input6.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +95,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input7.q.out ql/src/test/results/clientpositive/input7.q.out index 281f6d4a09..0b7279a664 100644 --- ql/src/test/results/clientpositive/input7.q.out +++ ql/src/test/results/clientpositive/input7.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: int) + outputColumnNames: c1, c2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +92,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: double, int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input8.q.out ql/src/test/results/clientpositive/input8.q.out index 97ccb0d63d..310195863b 100644 --- ql/src/test/results/clientpositive/input8.q.out +++ ql/src/test/results/clientpositive/input8.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +92,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input9.q.out ql/src/test/results/clientpositive/input9.q.out index 8df7fb760e..32ead4f837 100644 --- ql/src/test/results/clientpositive/input9.q.out +++ ql/src/test/results/clientpositive/input9.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, key + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +95,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_part1.q.out ql/src/test/results/clientpositive/input_part1.q.out index 9a9b1921c9..31ce924066 100644 --- ql/src/test/results/clientpositive/input_part1.q.out +++ ql/src/test/results/clientpositive/input_part1.q.out @@ -72,6 +72,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +141,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -167,6 +212,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_part10.q.out ql/src/test/results/clientpositive/input_part10.q.out index e8ad15b914..ea4e71594e 100644 --- ql/src/test/results/clientpositive/input_part10.q.out +++ ql/src/test/results/clientpositive/input_part10.q.out @@ -29,7 +29,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +49,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Select Operator @@ -70,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_special + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008 04 08' (type: string), '10:11:12=455' (type: string) + outputColumnNames: a, b, ds, ts + Statistics: Num rows: 1 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + keys: ds (type: string), ts (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -87,6 +103,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: string, string + Table: default.part_special + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455') SELECT 1, 2 FROM src LIMIT 1 diff --git ql/src/test/results/clientpositive/input_part2.q.out ql/src/test/results/clientpositive/input_part2.q.out index 6942b23984..c8ca522573 100644 --- ql/src/test/results/clientpositive/input_part2.q.out +++ ql/src/test/results/clientpositive/input_part2.q.out @@ -29,17 +29,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -89,6 +85,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-08' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: ((ds = '2008-04-09') and (key < 100)) (type: boolean) @@ -129,6 +141,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -231,6 +270,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -273,6 +341,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -454,15 +527,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -495,186 +559,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest2 + Is Table Level Stats: true Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + MultiFileSpray: false PREHOOK: query: FROM srcpart INSERT OVERWRITE TABLE dest1 SELECT srcpart.key, srcpart.value, srcpart.hr, srcpart.ds WHERE srcpart.key < 100 and srcpart.ds = '2008-04-08' and srcpart.hr = '12' diff --git ql/src/test/results/clientpositive/input_part5.q.out ql/src/test/results/clientpositive/input_part5.q.out index 7da77fb3cd..33ecd59709 100644 --- ql/src/test/results/clientpositive/input_part5.q.out +++ ql/src/test/results/clientpositive/input_part5.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +95,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_testsequencefile.q.out ql/src/test/results/clientpositive/input_testsequencefile.q.out index c248d03bfc..847d045d59 100644 --- ql/src/test/results/clientpositive/input_testsequencefile.q.out +++ ql/src/test/results/clientpositive/input_testsequencefile.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4_sequencefile + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +92,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest4_sequencefile Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_testxpath.q.out ql/src/test/results/clientpositive/input_testxpath.q.out index a68a5003fe..ddda55eeba 100644 --- ql/src/test/results/clientpositive/input_testxpath.q.out +++ ql/src/test/results/clientpositive/input_testxpath.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, mapvalue + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(mapvalue, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +92,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, mapvalue + Column Types: int, string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_testxpath2.q.out ql/src/test/results/clientpositive/input_testxpath2.q.out index ed45157141..5c40743fe7 100644 --- ql/src/test/results/clientpositive/input_testxpath2.q.out +++ ql/src/test/results/clientpositive/input_testxpath2.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: lint_size, lintstring_size, mstringstring_size + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(lint_size, 'hll'), compute_stats(lintstring_size, 'hll'), compute_stats(mstringstring_size, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +95,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: lint_size, lintstring_size, mstringstring_size + Column Types: int, int, int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insert1.q.out ql/src/test/results/clientpositive/insert1.q.out index aeb89eb9b7..b3a635b7ef 100644 --- ql/src/test/results/clientpositive/insert1.q.out +++ ql/src/test/results/clientpositive/insert1.q.out @@ -60,6 +60,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -83,6 +109,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-3 Map Reduce @@ -150,6 +180,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -173,6 +229,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-3 Map Reduce @@ -254,6 +314,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -277,6 +363,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 Stage: Stage-3 Map Reduce @@ -344,6 +434,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -367,6 +483,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-3 Map Reduce @@ -413,17 +533,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -447,6 +563,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -462,6 +591,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -485,6 +642,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-4 Map Reduce @@ -516,15 +677,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -538,36 +690,32 @@ STAGE PLANS: Stage: Stage-9 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 Stage: Stage-10 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: CREATE DATABASE db2 PREHOOK: type: CREATEDATABASE diff --git ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out index a9378f81e5..b17bc11f89 100644 --- ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out +++ ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out @@ -41,7 +41,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -58,7 +59,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -75,6 +75,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string), '11' (type: string) + outputColumnNames: one, two, ds, hr + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -92,6 +108,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE destinTable PARTITION (ds='2011-11-11', hr='11') if not exists SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 @@ -175,7 +224,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -192,7 +242,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -209,6 +258,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: one, two + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -223,6 +287,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE destinTable SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out index c8bfdc6bb2..77913f0a5e 100644 --- ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out +++ ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out @@ -52,7 +52,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,7 +70,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -86,6 +86,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,6 +118,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 @@ -141,7 +190,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -158,7 +208,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -175,6 +224,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -191,6 +256,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 diff --git ql/src/test/results/clientpositive/insert_into1.q.out ql/src/test/results/clientpositive/insert_into1.q.out index 3d1438a037..023a6fb746 100644 --- ql/src/test/results/clientpositive/insert_into1.q.out +++ ql/src/test/results/clientpositive/insert_into1.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -34,7 +35,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -56,6 +56,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -70,6 +85,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -126,7 +167,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -143,7 +185,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -165,6 +206,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -179,6 +235,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -235,7 +317,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -252,7 +335,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -274,6 +356,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -288,6 +385,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY @@ -371,6 +494,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -394,6 +543,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 Stage: Stage-3 Map Reduce @@ -469,6 +622,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -492,6 +671,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insert_into2.q.out ql/src/test/results/clientpositive/insert_into2.q.out index 90b409cbb1..efffd5372f 100644 --- ql/src/test/results/clientpositive/insert_into2.q.out +++ ql/src/test/results/clientpositive/insert_into2.q.out @@ -21,7 +21,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -38,7 +39,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -60,6 +60,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -76,6 +92,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY @@ -171,7 +220,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -188,7 +238,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -210,6 +259,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -226,6 +291,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 @@ -290,7 +388,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -307,7 +406,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -329,6 +427,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -345,6 +459,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 diff --git ql/src/test/results/clientpositive/insert_into3.q.out ql/src/test/results/clientpositive/insert_into3.q.out index 4bda7d9514..77fca861ab 100644 --- ql/src/test/results/clientpositive/insert_into3.q.out +++ ql/src/test/results/clientpositive/insert_into3.q.out @@ -31,10 +31,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -51,7 +53,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -82,6 +83,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -96,16 +112,49 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -126,6 +175,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -137,9 +201,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100 @@ -192,10 +274,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -214,7 +298,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Select Operator expressions: key (type: string), value (type: string) @@ -249,6 +332,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -263,6 +361,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a Stage: Stage-4 Map Reduce @@ -270,8 +372,37 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -293,6 +424,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -304,9 +450,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE insert_into3a SELECT * LIMIT 10 INSERT INTO TABLE insert_into3b SELECT * LIMIT 10 diff --git ql/src/test/results/clientpositive/insert_into4.q.out ql/src/test/results/clientpositive/insert_into4.q.out index 931ae3d7be..927d087849 100644 --- ql/src/test/results/clientpositive/insert_into4.q.out +++ ql/src/test/results/clientpositive/insert_into4.q.out @@ -29,7 +29,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +49,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -70,6 +70,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,6 +99,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY @@ -115,7 +156,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -134,7 +176,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -156,6 +197,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -170,6 +226,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY @@ -227,6 +309,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -250,6 +358,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4b Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insert_into5.q.out ql/src/test/results/clientpositive/insert_into5.q.out index 5e52e49263..66dd50276c 100644 --- ql/src/test/results/clientpositive/insert_into5.q.out +++ ql/src/test/results/clientpositive/insert_into5.q.out @@ -29,7 +29,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +49,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -66,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +95,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into5a SELECT 1, 'one' FROM src LIMIT 10 PREHOOK: type: QUERY @@ -137,6 +178,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -160,6 +227,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a Stage: Stage-3 Map Reduce @@ -249,6 +320,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -274,6 +379,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b Stage: Stage-3 Map Reduce @@ -365,6 +474,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -390,6 +533,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insert_into6.q.out ql/src/test/results/clientpositive/insert_into6.q.out index 964e259771..c0fc7bd01b 100644 --- ql/src/test/results/clientpositive/insert_into6.q.out +++ ql/src/test/results/clientpositive/insert_into6.q.out @@ -31,7 +31,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -50,7 +51,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -72,6 +72,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -88,6 +104,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into6a PARTITION (ds='1') SELECT * FROM src LIMIT 150 PREHOOK: type: QUERY @@ -161,6 +210,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -186,6 +269,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6b Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out index 4c020fd26e..ed97f83906 100644 --- ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out +++ ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out @@ -99,7 +99,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -130,6 +131,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: change, num + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(change, 'hll'), compute_stats(num, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -144,6 +160,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: change, num + Column Types: string, string + Table: default.temp1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: CREATE TABLE temp2 ( @@ -178,7 +220,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -210,6 +253,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: create_ts, change, num + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(create_ts, 'hll'), compute_stats(change, 'hll'), compute_stats(num, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -224,6 +282,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: create_ts, change, num + Column Types: string, string, string + Table: default.temp2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/join14.q.out ql/src/test/results/clientpositive/join14.q.out index 10b4e1fd8b..cb456e23d7 100644 --- ql/src/test/results/clientpositive/join14.q.out +++ ql/src/test/results/clientpositive/join14.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +105,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value diff --git ql/src/test/results/clientpositive/join17.q.out ql/src/test/results/clientpositive/join17.q.out index 6c3e5fe40f..e6aac08a05 100644 --- ql/src/test/results/clientpositive/join17.q.out +++ ql/src/test/results/clientpositive/join17.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -162,6 +163,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -195,6 +223,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* diff --git ql/src/test/results/clientpositive/join25.q.out ql/src/test/results/clientpositive/join25.q.out index 8ed420bbaa..30a62ce00e 100644 --- ql/src/test/results/clientpositive/join25.q.out +++ ql/src/test/results/clientpositive/join25.q.out @@ -17,13 +17,14 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +111,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value diff --git ql/src/test/results/clientpositive/join26.q.out ql/src/test/results/clientpositive/join26.q.out index 134aa523d8..870a735d8e 100644 --- ql/src/test/results/clientpositive/join26.q.out +++ ql/src/test/results/clientpositive/join26.q.out @@ -19,13 +19,14 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.key = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:y @@ -74,7 +75,7 @@ STAGE PLANS: 2 _col0 (type: string) Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -136,6 +137,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -316,6 +344,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/join27.q.out ql/src/test/results/clientpositive/join27.q.out index 2e78d2ad22..a8f4ff4389 100644 --- ql/src/test/results/clientpositive/join27.q.out +++ ql/src/test/results/clientpositive/join27.q.out @@ -17,13 +17,14 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +111,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value diff --git ql/src/test/results/clientpositive/join28.q.out ql/src/test/results/clientpositive/join28.q.out index c4b122fcb6..3b29191e67 100644 --- ql/src/test/results/clientpositive/join28.q.out +++ ql/src/test/results/clientpositive/join28.q.out @@ -23,13 +23,14 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -70,7 +71,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -115,6 +116,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -131,6 +147,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value diff --git ql/src/test/results/clientpositive/join29.q.out ql/src/test/results/clientpositive/join29.q.out index d394eda3d4..02521dda72 100644 --- ql/src/test/results/clientpositive/join29.q.out +++ ql/src/test/results/clientpositive/join29.q.out @@ -20,15 +20,16 @@ FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 - Stage-8 has a backup stage: Stage-2 - Stage-5 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2, Stage-5, Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-10 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-10 Stage-2 - Stage-4 is a root stage + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -66,10 +67,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -83,7 +84,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -107,6 +108,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt1, 'hll'), compute_stats(cnt2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -123,8 +139,34 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt1, cnt2 + Column Types: string, int, int + Table: default.dest_j1 - Stage: Stage-9 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -138,7 +180,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -162,6 +204,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt1, 'hll'), compute_stats(cnt2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -203,8 +260,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt1, 'hll'), compute_stats(cnt2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/join3.q.out ql/src/test/results/clientpositive/join3.q.out index 8c4cfe2c37..87fb0574b0 100644 --- ql/src/test/results/clientpositive/join3.q.out +++ ql/src/test/results/clientpositive/join3.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -92,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,6 +122,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/join30.q.out ql/src/test/results/clientpositive/join30.q.out index cdd54458db..d29ef1b2a6 100644 --- ql/src/test/results/clientpositive/join30.q.out +++ ql/src/test/results/clientpositive/join30.q.out @@ -15,13 +15,14 @@ INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +128,32 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key diff --git ql/src/test/results/clientpositive/join31.q.out ql/src/test/results/clientpositive/join31.q.out index 7b0cbdc64e..abcb32fa71 100644 --- ql/src/test/results/clientpositive/join31.q.out +++ ql/src/test/results/clientpositive/join31.q.out @@ -22,10 +22,11 @@ group by subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-8 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5 + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -60,7 +61,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:y @@ -133,6 +134,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -147,6 +163,32 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.dest_j1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt diff --git ql/src/test/results/clientpositive/join32.q.out ql/src/test/results/clientpositive/join32.q.out index 30a5ba9195..47c88b82c3 100644 --- ql/src/test/results/clientpositive/join32.q.out +++ ql/src/test/results/clientpositive/join32.q.out @@ -19,13 +19,14 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -120,7 +121,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -189,6 +190,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -369,6 +397,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/join33.q.out ql/src/test/results/clientpositive/join33.q.out index 30a5ba9195..47c88b82c3 100644 --- ql/src/test/results/clientpositive/join33.q.out +++ ql/src/test/results/clientpositive/join33.q.out @@ -19,13 +19,14 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -120,7 +121,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -189,6 +190,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -369,6 +397,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/join34.q.out ql/src/test/results/clientpositive/join34.q.out index 072a20b62d..7675ad9870 100644 --- ql/src/test/results/clientpositive/join34.q.out +++ ql/src/test/results/clientpositive/join34.q.out @@ -27,13 +27,14 @@ FROM JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -59,7 +60,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -121,6 +122,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -180,6 +208,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -312,6 +367,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value diff --git ql/src/test/results/clientpositive/join35.q.out ql/src/test/results/clientpositive/join35.q.out index f8fd4387a9..4590ef946d 100644 --- ql/src/test/results/clientpositive/join35.q.out +++ ql/src/test/results/clientpositive/join35.q.out @@ -28,11 +28,12 @@ JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 - Stage-4 is a root stage + Stage-8 depends on stages: Stage-1, Stage-5 + Stage-7 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -139,7 +140,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -165,7 +166,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -217,6 +218,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan GatherStats: false Union @@ -266,6 +294,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -295,7 +350,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10004 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -396,11 +451,85 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, int + Table: default.dest_j1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false diff --git ql/src/test/results/clientpositive/join36.q.out ql/src/test/results/clientpositive/join36.q.out index 0fd4d918e6..40dbabb1d0 100644 --- ql/src/test/results/clientpositive/join36.q.out +++ ql/src/test/results/clientpositive/join36.q.out @@ -57,13 +57,14 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -86,7 +87,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -119,6 +120,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -135,6 +151,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, int, int + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt diff --git ql/src/test/results/clientpositive/join37.q.out ql/src/test/results/clientpositive/join37.q.out index c8bef838f4..4effb31d9a 100644 --- ql/src/test/results/clientpositive/join37.q.out +++ ql/src/test/results/clientpositive/join37.q.out @@ -17,13 +17,14 @@ SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +111,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value diff --git ql/src/test/results/clientpositive/join39.q.out ql/src/test/results/clientpositive/join39.q.out index 93ebdb85c7..80272a4caf 100644 --- ql/src/test/results/clientpositive/join39.q.out +++ ql/src/test/results/clientpositive/join39.q.out @@ -17,13 +17,14 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -72,6 +73,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, key1, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(key1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -88,6 +104,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, key1, val2 + Column Types: string, string, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value diff --git ql/src/test/results/clientpositive/join4.q.out ql/src/test/results/clientpositive/join4.q.out index d4e83c444f..95a4dbfac6 100644 --- ql/src/test/results/clientpositive/join4.q.out +++ ql/src/test/results/clientpositive/join4.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +128,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join5.q.out ql/src/test/results/clientpositive/join5.q.out index 8cfe40271a..7ce3b34a2d 100644 --- ql/src/test/results/clientpositive/join5.q.out +++ ql/src/test/results/clientpositive/join5.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +128,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join6.q.out ql/src/test/results/clientpositive/join6.q.out index 8d6578afc6..989ae2a512 100644 --- ql/src/test/results/clientpositive/join6.q.out +++ ql/src/test/results/clientpositive/join6.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +128,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join7.q.out ql/src/test/results/clientpositive/join7.q.out index f1af54a7b4..f34df53d83 100644 --- ql/src/test/results/clientpositive/join7.q.out +++ ql/src/test/results/clientpositive/join7.q.out @@ -49,7 +49,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +127,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -140,6 +156,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join8.q.out ql/src/test/results/clientpositive/join8.q.out index ce5a5d43b4..bfe4fd8411 100644 --- ql/src/test/results/clientpositive/join8.q.out +++ ql/src/test/results/clientpositive/join8.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -101,6 +102,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -115,6 +131,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join9.q.out ql/src/test/results/clientpositive/join9.q.out index 58f8d7dfbe..6db5185b20 100644 --- ql/src/test/results/clientpositive/join9.q.out +++ ql/src/test/results/clientpositive/join9.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -210,6 +211,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -243,6 +271,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' diff --git ql/src/test/results/clientpositive/join_map_ppr.q.out ql/src/test/results/clientpositive/join_map_ppr.q.out index 6eb9889f90..c5acb9b351 100644 --- ql/src/test/results/clientpositive/join_map_ppr.q.out +++ ql/src/test/results/clientpositive/join_map_ppr.q.out @@ -131,6 +131,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -186,6 +202,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -228,6 +273,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true Stage: Stage-5 Map Reduce @@ -689,7 +739,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -710,6 +760,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -765,6 +831,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -784,7 +879,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -807,6 +902,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true Stage: Stage-5 Map Reduce @@ -822,7 +922,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -852,7 +952,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -873,7 +973,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -909,7 +1009,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -939,7 +1039,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -960,7 +1060,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 diff --git ql/src/test/results/clientpositive/lb_fs_stats.q.out ql/src/test/results/clientpositive/lb_fs_stats.q.out index 330908982a..65953640aa 100644 --- ql/src/test/results/clientpositive/lb_fs_stats.q.out +++ ql/src/test/results/clientpositive/lb_fs_stats.q.out @@ -44,7 +44,7 @@ Database: default Table: test_tab #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 4812 diff --git ql/src/test/results/clientpositive/limit_pushdown_negative.q.out ql/src/test/results/clientpositive/limit_pushdown_negative.q.out index 7d0abfdf97..7bcde29a90 100644 --- ql/src/test/results/clientpositive/limit_pushdown_negative.q.out +++ ql/src/test/results/clientpositive/limit_pushdown_negative.q.out @@ -226,10 +226,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -269,6 +271,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -298,6 +315,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_2 Stage: Stage-4 Map Reduce @@ -305,8 +326,37 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_3 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Select Operator @@ -328,6 +378,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -339,7 +404,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/lineage1.q.out ql/src/test/results/clientpositive/lineage1.q.out index 7d6af3e1df..beee1294d5 100644 --- ql/src/test/results/clientpositive/lineage1.q.out +++ ql/src/test/results/clientpositive/lineage1.q.out @@ -111,6 +111,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE @@ -126,6 +139,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -149,6 +188,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_l1 Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/list_bucket_dml_1.q.out ql/src/test/results/clientpositive/list_bucket_dml_1.q.out index 2eca946673..6901a8e85b 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_1.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_1.q.out @@ -65,6 +65,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +186,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -200,6 +253,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY @@ -239,7 +297,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -282,7 +340,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -354,7 +412,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_11.q.out ql/src/test/results/clientpositive/list_bucket_dml_11.q.out index 6b347454ac..872dd14a36 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_11.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +139,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +206,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src @@ -194,7 +252,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -237,7 +295,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_12.q.out ql/src/test/results/clientpositive/list_bucket_dml_12.q.out index a0bc3e99ce..09d4718a14 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_12.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +139,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +206,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') select 1, key, 1, value, 1 from src @@ -200,7 +258,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -245,7 +303,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 @@ -337,7 +395,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git ql/src/test/results/clientpositive/list_bucket_dml_13.q.out ql/src/test/results/clientpositive/list_bucket_dml_13.q.out index df8717b0b3..becdf67aa6 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_13.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +139,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +206,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') select 1, key, 1, value, 1 from src @@ -200,7 +258,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -245,7 +303,7 @@ STAGE PLANS: ds 2008-04-08 hr 2013-01-23+18:00:99 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git ql/src/test/results/clientpositive/list_bucket_dml_14.q.out ql/src/test/results/clientpositive/list_bucket_dml_14.q.out index 6b48f3dd83..b6f65409aa 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_14.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_14.q.out @@ -65,6 +65,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -116,6 +132,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -149,6 +194,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing + Is Table Level Stats: true PREHOOK: query: insert overwrite table list_bucketing select * from src PREHOOK: type: QUERY @@ -177,7 +227,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -283,7 +333,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -304,7 +354,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_2.q.out ql/src/test/results/clientpositive/list_bucket_dml_2.q.out index 512aeabb8c..0280c0253d 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +257,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' @@ -249,7 +307,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -310,7 +368,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_3.q.out ql/src/test/results/clientpositive/list_bucket_dml_3.q.out index e86bf0a829..b556890eb0 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_3.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_3.q.out @@ -59,6 +59,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +180,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -194,6 +247,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds='2008-04-08', hr='11') select key, value from srcpart where ds='2008-04-08' PREHOOK: type: QUERY @@ -230,7 +288,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 10624 @@ -302,7 +360,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_4.q.out ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index 7f90ac5928..7fc96c1a7e 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +257,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' @@ -249,7 +307,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -330,6 +388,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -432,6 +509,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -473,6 +584,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false Stage: Stage-3 Merge File Operator @@ -622,7 +738,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -683,7 +799,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_5.q.out ql/src/test/results/clientpositive/list_bucket_dml_5.q.out index 64fdacb54c..f70ffa55f7 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_5.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_5.q.out @@ -65,6 +65,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +186,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -200,6 +253,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY @@ -239,7 +297,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -282,7 +340,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -359,7 +417,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -405,7 +463,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index 32dac42591..5b625f0d41 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +257,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' @@ -253,7 +311,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -294,7 +352,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -376,6 +434,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -478,6 +555,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -520,6 +631,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false Stage: Stage-3 Merge File Operator @@ -677,7 +793,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -718,7 +834,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -779,7 +895,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -825,7 +941,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_7.q.out ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 7c11d3ff8f..4160ad60b7 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +257,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' @@ -253,7 +311,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -294,7 +352,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -376,6 +434,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -478,6 +555,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -520,6 +631,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false Stage: Stage-3 Merge File Operator @@ -677,7 +793,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -718,7 +834,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 984 rawDataSize 9488 @@ -779,7 +895,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -825,7 +941,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_8.q.out ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index d45be4e9e7..7e9aad484d 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +257,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' @@ -253,7 +311,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -294,7 +352,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -409,7 +467,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_9.q.out ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index 9df21303c0..495934fb39 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +257,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' @@ -249,7 +307,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -330,6 +388,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -432,6 +509,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -473,6 +584,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false Stage: Stage-3 Merge File Operator @@ -622,7 +738,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -683,7 +799,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out index a1f225cb45..0dc5493929 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out @@ -45,7 +45,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -95,7 +95,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -179,7 +179,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -264,7 +264,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -347,7 +347,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 0ad2ff56f4..3aefb55a34 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -45,7 +45,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -95,7 +95,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -179,7 +179,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out index 02fc314e8c..13073f44bf 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out @@ -41,7 +41,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -99,7 +99,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -162,7 +162,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -205,7 +205,7 @@ STAGE PLANS: ds 1 hr 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -316,7 +316,7 @@ STAGE PLANS: ds 1 hr 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -402,7 +402,7 @@ STAGE PLANS: ds 1 hr 3 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out index 86a9bf5455..77bea79574 100644 --- ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out +++ ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out @@ -86,7 +86,6 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {} bucket_count 16 bucket_field_name a column.name.delimiter , @@ -110,7 +109,6 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {} bucket_count 16 bucket_field_name a column.name.delimiter , diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 9b83fad8cd..5fe7544fd8 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1471,10 +1471,11 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Reducer 3 (SIMPLE_EDGE) + Reducer 7 <- Reducer 3 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: @@ -1517,7 +1518,7 @@ STAGE PLANS: partitionColumnCount: 2 partitionColumns: ds:string, hr:string scratchColumnTypeNames: [] - Map 7 + Map 8 Map Operator Tree: TableScan Vectorization: native: true @@ -1661,6 +1662,14 @@ STAGE PLANS: notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported vectorized: false Reduce Operator Tree: + Reducer 7 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: Stage: Stage-5 @@ -2246,11 +2255,12 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Reducer 3 (SIMPLE_EDGE) Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: @@ -2293,7 +2303,7 @@ STAGE PLANS: partitionColumnCount: 2 partitionColumns: ds:string, hr:string scratchColumnTypeNames: [] - Map 8 + Map 9 Map Operator Tree: TableScan Vectorization: native: true @@ -2438,28 +2448,21 @@ STAGE PLANS: vectorized: false Reduce Operator Tree: Reducer 7 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: - reduceColumnSortOrder: - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 0 - partitionColumnCount: 0 - scratchColumnTypeNames: [string, string, string, string] + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Reducer 8 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3] - selectExpressions: ConstantVectorExpression(val this) -> 0:string, ConstantVectorExpression(val should) -> 1:string, ConstantVectorExpression(val not) -> 2:string, ConstantVectorExpression(val be there) -> 3:string - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Stage: Stage-5 diff --git ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out index 1a2aff7bf3..dbd9797ce4 100644 --- ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out +++ ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out @@ -427,18 +427,18 @@ STAGE PLANS: TableScan alias: over10k_orc_bucketed filterExpr: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 2098 Data size: 39900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2098 Data size: 41920 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 @@ -447,10 +447,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -497,18 +497,18 @@ STAGE PLANS: TableScan alias: over10k_orc_bucketed filterExpr: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 2098 Data size: 39900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2098 Data size: 41920 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), i (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -518,10 +518,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -570,19 +570,19 @@ STAGE PLANS: TableScan alias: over10k_orc_bucketed filterExpr: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 2098 Data size: 1021440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2098 Data size: 706986 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 674 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col5 (type: float), _col6 (type: double), _col7 (type: boolean), _col8 (type: string), _col9 (type: timestamp), _col10 (type: decimal(4,2)), _col11 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -592,10 +592,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), 0 (type: int), 4294967363 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out index 2b069ecec7..c87a0a64f2 100644 --- ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out +++ ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out @@ -88,7 +88,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -237,7 +237,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -284,7 +284,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 diff --git ql/src/test/results/clientpositive/llap/auto_join1.q.out ql/src/test/results/clientpositive/llap/auto_join1.q.out index bbe63e24d4..9146560e36 100644 --- ql/src/test/results/clientpositive/llap/auto_join1.q.out +++ ql/src/test/results/clientpositive/llap/auto_join1.q.out @@ -26,6 +26,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -81,8 +82,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +129,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index 80a74fbfdf..014f720981 100644 --- ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -64,40 +64,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -107,10 +107,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -174,25 +174,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -200,17 +200,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reducer 2 Execution mode: llap @@ -219,17 +219,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -238,10 +238,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -330,25 +330,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -356,43 +356,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -400,18 +400,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 @@ -422,12 +422,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -439,14 +439,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -459,12 +459,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -547,40 +547,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -590,10 +590,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -664,40 +664,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -707,10 +707,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -805,40 +805,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -848,10 +848,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -934,40 +934,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -977,10 +977,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1042,38 +1042,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1085,15 +1085,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1102,10 +1102,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1162,40 +1162,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1205,10 +1205,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1271,36 +1271,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1309,15 +1309,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1327,10 +1327,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1409,40 +1409,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1452,10 +1452,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1540,30 +1540,34 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1571,32 +1575,88 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: llap + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1614,6 +1674,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -1628,6 +1692,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -1757,32 +1825,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1790,53 +1860,109 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1854,6 +1980,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -1868,6 +1998,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out index b8f10fec67..c1459d53ef 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out @@ -80,14 +80,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -96,15 +96,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 5 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -112,14 +112,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -128,15 +128,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 5 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -144,24 +144,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -171,10 +171,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -248,34 +248,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: final outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -284,15 +284,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -303,10 +303,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out index 2d03e8cb72..72d2c62e5b 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -70,15 +70,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -86,16 +86,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -105,10 +105,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -164,27 +164,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 189 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 189 Data size: 756 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 189 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 189 Data size: 756 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -193,15 +193,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 207 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 189 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -212,10 +212,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out index ce41569f49..91b1d8ea67 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -70,15 +70,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -86,16 +86,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -105,10 +105,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -143,27 +143,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -172,15 +172,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -191,10 +191,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out index 60c35fa21a..569b308470 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out @@ -84,66 +84,63 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -153,45 +150,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 50050 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 50050 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 605 Data size: 55055 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -230,66 +211,63 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + alias: d + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -299,45 +277,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 50050 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 50050 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 605 Data size: 55055 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -376,66 +338,63 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: h - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + alias: h + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -445,45 +404,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 50050 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 50050 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 605 Data size: 55055 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -522,66 +465,63 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 809 Data size: 3236 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 809 Data size: 3236 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: UDFToDouble(_col0) (type: double) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -593,43 +533,27 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -674,36 +598,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -712,15 +636,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 1045 Data size: 3977 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -730,10 +654,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -778,57 +702,57 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1 + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -838,17 +762,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -857,10 +781,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -899,66 +823,63 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -968,45 +889,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 50050 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 50050 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 605 Data size: 55055 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1045,66 +950,63 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 809 Data size: 3236 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 809 Data size: 3236 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: UDFToDouble(_col0) (type: double) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1116,43 +1018,27 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1197,36 +1083,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1235,15 +1121,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 1045 Data size: 3977 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1253,10 +1139,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1301,57 +1187,57 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: b - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2 - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1 + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 809 Data size: 73619 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1361,17 +1247,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1380,10 +1266,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out index 8bd3d126c1..37c07afbf1 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out @@ -65,14 +65,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -81,15 +81,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -97,19 +97,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -119,10 +119,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -180,14 +180,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -197,18 +197,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -216,19 +216,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -239,10 +239,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -316,14 +316,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -333,36 +333,36 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -372,17 +372,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -391,10 +391,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -483,14 +483,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -500,24 +500,24 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -525,19 +525,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -548,7 +548,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -558,14 +558,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Reducer 3 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -578,12 +578,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -667,14 +667,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -683,15 +683,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -699,19 +699,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -721,10 +721,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -796,14 +796,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -812,15 +812,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -828,19 +828,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -850,10 +850,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -949,14 +949,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -965,15 +965,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -981,19 +981,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1003,10 +1003,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1090,14 +1090,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1106,15 +1106,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1122,19 +1122,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1144,10 +1144,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1209,14 +1209,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1225,15 +1225,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1241,19 +1241,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1263,10 +1263,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1324,14 +1324,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1340,15 +1340,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1356,19 +1356,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1378,10 +1378,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1437,14 +1437,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1453,15 +1453,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1469,19 +1469,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1491,10 +1491,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1560,14 +1560,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1579,15 +1579,15 @@ STAGE PLANS: input vertices: 1 Map 3 2 Map 4 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1595,38 +1595,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1636,10 +1636,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1719,14 +1719,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1735,15 +1735,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1751,19 +1751,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1773,10 +1773,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1846,14 +1846,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1862,15 +1862,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1878,19 +1878,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1900,10 +1900,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1961,14 +1961,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1978,18 +1978,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1997,19 +1997,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2020,10 +2020,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2097,14 +2097,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2114,36 +2114,36 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2153,17 +2153,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -2172,10 +2172,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2264,14 +2264,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2281,24 +2281,24 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2306,19 +2306,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2329,7 +2329,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2339,14 +2339,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Reducer 3 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2359,12 +2359,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -2448,14 +2448,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2464,15 +2464,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2480,19 +2480,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2502,10 +2502,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2577,14 +2577,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2593,15 +2593,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2609,19 +2609,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2631,10 +2631,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2730,14 +2730,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2746,15 +2746,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2762,19 +2762,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2784,10 +2784,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2871,14 +2871,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2887,15 +2887,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2903,19 +2903,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2925,10 +2925,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2986,14 +2986,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3002,15 +3002,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3018,19 +3018,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3040,10 +3040,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3099,14 +3099,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3115,15 +3115,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3131,19 +3131,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3153,10 +3153,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3222,14 +3222,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3241,15 +3241,15 @@ STAGE PLANS: input vertices: 1 Map 3 2 Map 4 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3257,38 +3257,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3298,10 +3298,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3381,14 +3381,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3397,15 +3397,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3413,19 +3413,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3435,10 +3435,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/bucket2.q.out ql/src/test/results/clientpositive/llap/bucket2.q.out index 9954c2dd99..10fc171ae1 100644 --- ql/src/test/results/clientpositive/llap/bucket2.q.out +++ ql/src/test/results/clientpositive/llap/bucket2.q.out @@ -140,6 +140,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,6 +212,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket2_1 select * from src diff --git ql/src/test/results/clientpositive/llap/bucket3.q.out ql/src/test/results/clientpositive/llap/bucket3.q.out index 218f9b7e48..9ae5166ded 100644 --- ql/src/test/results/clientpositive/llap/bucket3.q.out +++ ql/src/test/results/clientpositive/llap/bucket3.q.out @@ -26,6 +26,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -138,6 +139,61 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -174,6 +230,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src diff --git ql/src/test/results/clientpositive/llap/bucket4.q.out ql/src/test/results/clientpositive/llap/bucket4.q.out index 2115565096..0a1c49797b 100644 --- ql/src/test/results/clientpositive/llap/bucket4.q.out +++ ql/src/test/results/clientpositive/llap/bucket4.q.out @@ -142,6 +142,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -180,6 +215,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket4_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket4_1 select * from src diff --git ql/src/test/results/clientpositive/llap/bucket5.q.out ql/src/test/results/clientpositive/llap/bucket5.q.out index 680dbd95d2..a679fc9175 100644 --- ql/src/test/results/clientpositive/llap/bucket5.q.out +++ ql/src/test/results/clientpositive/llap/bucket5.q.out @@ -43,7 +43,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -173,10 +175,57 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: Select Operator expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -213,6 +262,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 5 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -251,6 +347,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucketed_table + Is Table Level Stats: true Stage: Stage-1 Move Operator @@ -284,6 +385,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.unbucketed_table + Is Table Level Stats: true Stage: Stage-10 Conditional Operator @@ -515,7 +621,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} SORTBUCKETCOLSPREFIX TRUE numFiles 2 numRows 500 diff --git ql/src/test/results/clientpositive/llap/bucket6.q.out ql/src/test/results/clientpositive/llap/bucket6.q.out index 42f062b0f6..78c044d529 100644 --- ql/src/test/results/clientpositive/llap/bucket6.q.out +++ ql/src/test/results/clientpositive/llap/bucket6.q.out @@ -24,6 +24,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -58,6 +59,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +104,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_bucket PREHOOK: query: insert into table src_bucket select key,value from srcpart PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/bucket_groupby.q.out ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index 98c1e48805..e2b60025b5 100644 --- ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -62,22 +62,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -90,11 +90,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -103,13 +103,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -189,22 +189,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -217,11 +217,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -230,13 +230,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -290,22 +290,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: length(key) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -318,13 +318,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -371,22 +371,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: abs(length(key)) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -399,13 +399,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -453,22 +453,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -481,11 +481,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -494,13 +494,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -555,22 +555,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: value - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -583,11 +583,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -596,13 +596,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -656,22 +656,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -683,10 +683,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 92804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1041,22 +1041,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1068,10 +1068,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 92804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1155,23 +1155,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() bucketGroup: true keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1184,11 +1184,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1197,13 +1197,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1258,22 +1258,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: value - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1286,11 +1286,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1299,13 +1299,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1460,23 +1460,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() bucketGroup: true keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1489,11 +1489,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1502,13 +1502,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1563,22 +1563,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) Execution mode: llap @@ -1591,15 +1591,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1608,13 +1608,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 3600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 3600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/bucket_many.q.out ql/src/test/results/clientpositive/llap/bucket_many.q.out index 4f3bee20ce..22027457ed 100644 --- ql/src/test/results/clientpositive/llap/bucket_many.q.out +++ ql/src/test/results/clientpositive/llap/bucket_many.q.out @@ -26,6 +26,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -140,6 +141,53 @@ STAGE PLANS: TotalFiles: 256 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,6 +225,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket_many + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket_many select * from src diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index 20ef495a5a..11eee1abe6 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -4132,19 +4132,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -4178,14 +4178,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4221,19 +4221,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -4258,14 +4258,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out index fa6a2d0d3f..480775fbc0 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out @@ -399,7 +399,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -478,7 +479,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -605,6 +606,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -641,6 +689,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -766,7 +819,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -845,7 +899,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -951,7 +1005,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -972,6 +1026,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -985,7 +1086,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1008,6 +1109,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index 57eead0ed5..14f1efe792 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -111,7 +111,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -192,7 +193,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -319,6 +320,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -355,6 +403,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -484,7 +537,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -565,7 +619,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -671,7 +725,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -692,6 +746,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -705,7 +806,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -728,6 +829,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -874,7 +980,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -955,7 +1062,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1110,7 +1217,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1131,6 +1238,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 61 Data size: 17884 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1144,7 +1298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1167,6 +1321,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index 184e8905f8..161631ac23 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -135,7 +135,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -216,7 +217,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -343,6 +344,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -379,6 +427,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -508,7 +561,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -589,7 +643,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -695,7 +749,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -716,6 +770,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -729,7 +830,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -752,6 +853,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out index b3530734ce..c07f722f5b 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out @@ -135,7 +135,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -214,7 +215,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -339,6 +340,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -375,6 +423,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -492,7 +545,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -571,7 +625,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -675,7 +729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -696,6 +750,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -709,7 +810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -732,6 +833,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out index b907c2dbd8..ce248323ce 100644 --- ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out +++ ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out @@ -101,25 +101,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -127,14 +128,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -144,16 +145,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -163,15 +164,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -191,6 +228,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -281,25 +322,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 3900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -307,14 +349,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -324,16 +366,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -343,15 +385,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -371,6 +449,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -485,25 +567,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -511,14 +594,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 168 Data size: 31740 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 46704 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 @@ -528,16 +611,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31080 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -547,15 +630,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 168 Data size: 45864 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -575,6 +694,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -695,25 +818,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -721,14 +845,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -738,16 +862,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -757,15 +881,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -785,6 +945,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -887,25 +1051,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -913,14 +1078,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -930,16 +1095,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 31248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -949,15 +1114,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -977,6 +1178,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.v1, b.v2) @@ -1079,25 +1284,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1105,14 +1311,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1122,16 +1328,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1141,15 +1347,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1169,6 +1411,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key+a.key, concat(a.value, b.value) diff --git ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out index f5f5f91e82..3ba8a65772 100644 --- ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out +++ ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out @@ -79,25 +79,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -105,14 +106,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -122,16 +123,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -141,15 +142,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -169,6 +206,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key2, concat(a.value, b.value) @@ -266,25 +307,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -292,14 +334,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -309,16 +351,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -328,15 +370,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -356,6 +434,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key, subq1.key2, subq1.value from @@ -453,25 +535,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -479,14 +562,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -496,16 +579,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -515,15 +598,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -543,6 +662,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -575,25 +698,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -601,14 +725,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -618,16 +742,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -637,15 +761,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -665,6 +825,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -703,25 +867,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -729,14 +894,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -746,16 +911,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -765,15 +930,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -793,6 +994,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from @@ -908,25 +1113,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -934,14 +1140,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -951,16 +1157,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -970,15 +1176,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -998,6 +1240,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.k2, subq2.k1, subq2.value from @@ -1123,25 +1369,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 2011 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1149,14 +1396,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 76 Data size: 14956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1166,16 +1413,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2618 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: -- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1185,15 +1432,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 16451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 14 Data size: 3878 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1373 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1405 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1213,4 +1496,8 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table4 diff --git ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out index 7b380562ac..0af21e0e26 100644 --- ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out +++ ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out @@ -79,25 +79,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -105,14 +106,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -122,16 +123,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -141,15 +142,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -169,6 +206,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -263,25 +304,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -289,14 +331,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -306,16 +348,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -325,15 +367,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -353,6 +431,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -453,25 +535,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -479,14 +562,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean) - Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -496,16 +579,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 183 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -515,15 +598,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 2145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 1 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -543,6 +662,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) diff --git ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out index 44f9d68a7e..ed63ddd306 100644 --- ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out +++ ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out @@ -36,7 +36,7 @@ PREHOOK: query: insert into table dest1 select * from src2 PREHOOK: type: QUERY PREHOOK: Input: default@src2 PREHOOK: Output: default@dest1 -{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select key k, dest1.value from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -467,20 +467,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"length(src1.value) is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"length(src2.value2) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select * from src1 where length(key) > 2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -530,7 +530,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE @@ -552,7 +552,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1 -{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery1:_u1-subquery1:p1.key = j-subquery1:_u1-subquery1:t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery2:_u1-subquery2:p2.key = j-subquery2:_u1-subquery2:t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery1:_u1-subquery1:p1.key = j-subquery1:_u1-subquery1:t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery2:_u1-subquery2:p2.key = j-subquery2:_u1-subquery2:t2.key)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(key), 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(value, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept.dept_name, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 'hll')","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project.project_id, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project.project_name, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -603,7 +603,7 @@ PREHOOK: Output: default@dest_l2 PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l2 -{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(tmp_values_col1), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(UDFToByte(tmp_values_col2), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(UDFToInteger(tmp_values_col3), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToLong(tmp_values_col4), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} PREHOOK: query: select * from ( select c1 + c2 x from dest_l2 union all @@ -623,7 +623,7 @@ PREHOOK: Output: default@dest_l3 PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l3 -{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(tmp_values_col1), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col2, 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col3, 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToInteger(tmp_values_col4), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id) from dest_l2 a where a.c2 != 10 diff --git ql/src/test/results/clientpositive/llap/column_access_stats.q.out ql/src/test/results/clientpositive/llap/column_access_stats.q.out index 4cdb486108..1baa7948f8 100644 --- ql/src/test/results/clientpositive/llap/column_access_stats.q.out +++ ql/src/test/results/clientpositive/llap/column_access_stats.q.out @@ -406,19 +406,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -431,10 +431,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -538,19 +538,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToDouble(val) = 3.0) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -564,10 +564,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 374 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 374 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -639,19 +639,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToDouble(key) = 6.0) and val is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: val (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -664,10 +664,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -734,19 +734,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -772,19 +772,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t3 - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -798,16 +798,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -818,10 +818,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out index 6e71803655..56b376ee03 100644 --- ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out +++ ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out @@ -35,7 +35,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1 rawDataSize 170 @@ -72,22 +72,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Statistics Aggregation Key Prefix: default.s/ GatherStats: true Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col0 (type: struct), _col1 (type: struct) auto parallelism: false @@ -102,7 +102,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -123,7 +123,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -152,13 +152,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -321,13 +321,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: spart - Statistics: Num rows: 2 Data size: 1812 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL Statistics Aggregation Key Prefix: default.spart/ GatherStats: true Select Operator expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 2 Data size: 1812 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) @@ -357,7 +357,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -401,7 +401,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -715,13 +715,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: spart - Statistics: Num rows: 1 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 354 Basic stats: COMPLETE Column stats: PARTIAL Statistics Aggregation Key Prefix: default.spart/ GatherStats: true Select Operator expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: key, value, ds - Statistics: Num rows: 1 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 354 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), '11' (type: string) @@ -751,7 +751,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -947,7 +947,7 @@ Database: default Table: spart #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1 rawDataSize 170 diff --git ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index 72f9aa2295..fff076e6a7 100644 --- ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -116,16 +116,36 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment -key int from deserializer +col_name key +data_type int +min 27 +max 484 +num_nulls 0 +distinct_count 20 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment -value string from deserializer +col_name value +data_type string +min +max +num_nulls 0 +distinct_count 20 +avg_col_len 6.8 +max_col_len 7 +num_trues +num_falses +bitVector HL +comment from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -186,16 +206,36 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment -key int from deserializer +col_name key +data_type int +min 27 +max 495 +num_nulls 0 +distinct_count 30 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment -value string from deserializer +col_name value +data_type string +min +max +num_nulls 0 +distinct_count 30 +avg_col_len 6.833333333333333 +max_col_len 7 +num_trues +num_falses +bitVector HL +comment from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -260,32 +300,72 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment -key int from deserializer +col_name key +data_type int +min 15 +max 495 +num_nulls 0 +distinct_count 40 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment -value string from deserializer +col_name value +data_type string +min +max +num_nulls 0 +distinct_count 40 +avg_col_len 6.825 +max_col_len 7 +num_trues +num_falses +bitVector HL +comment from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment -key int from deserializer +col_name key +data_type int +min 15 +max 495 +num_nulls 0 +distinct_count 58 +avg_col_len +max_col_len +num_trues +num_falses +bitVector HL +comment from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment -value string from deserializer +col_name value +data_type string +min +max +num_nulls 0 +distinct_count 58 +avg_col_len 6.883333333333334 +max_col_len 7 +num_trues +num_falses +bitVector HL +comment from deserializer PREHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats diff --git ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out index 7a486153d5..3be74ecfd6 100644 --- ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out +++ ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out @@ -62,30 +62,30 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Select Operator [SEL_11] (rows=5 width=413) + Select Operator [SEL_11] (rows=1 width=185) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=413) + Merge Join Operator [MERGEJOIN_17] (rows=1 width=94) Conds:RS_8._col3=RS_9._col0(Left Semi),Output:["_col0","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:_col3 - Select Operator [SEL_2] (rows=5 width=376) + Select Operator [SEL_2] (rows=2 width=189) Output:["_col0","_col2","_col3"] - Filter Operator [FIL_15] (rows=5 width=376) + Filter Operator [FIL_15] (rows=2 width=189) predicate:((val = 't1val01') and dimid is not null) - TableScan [TS_0] (rows=10 width=376) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=5 width=4) + Group By Operator [GBY_7] (rows=2 width=4) Output:["_col0"],keys:_col0 Select Operator [SEL_5] (rows=5 width=4) Output:["_col0"] Filter Operator [FIL_16] (rows=5 width=4) predicate:id is not null TableScan [TS_3] (rows=5 width=4) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id where table1.val = 't1val01' PREHOOK: type: QUERY @@ -116,44 +116,44 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_18] - Select Operator [SEL_17] (rows=5 width=232) + Select Operator [SEL_17] (rows=1 width=187) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_28] (rows=5 width=232) + Merge Join Operator [MERGEJOIN_28] (rows=1 width=96) Conds:RS_14._col2=RS_15._col0(Left Semi),Output:["_col0","_col4"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Group By Operator [GBY_13] (rows=5 width=4) + Group By Operator [GBY_13] (rows=2 width=4) Output:["_col0"],keys:_col0 Select Operator [SEL_8] (rows=5 width=4) Output:["_col0"] Filter Operator [FIL_26] (rows=5 width=4) predicate:id is not null TableScan [TS_6] (rows=5 width=4) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_27] (rows=5 width=211) + Merge Join Operator [MERGEJOIN_27] (rows=2 width=100) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=5 width=192) + Select Operator [SEL_2] (rows=2 width=99) Output:["_col0","_col2"] - Filter Operator [FIL_24] (rows=5 width=192) + Filter Operator [FIL_24] (rows=2 width=99) predicate:((val = 't1val01') and dimid is not null and id is not null) - TableScan [TS_0] (rows=10 width=192) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","dimid"] + TableScan [TS_0] (rows=10 width=99) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","dimid"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=3 width=188) + Select Operator [SEL_5] (rows=3 width=96) Output:["_col0","_col1"] - Filter Operator [FIL_25] (rows=3 width=188) + Filter Operator [FIL_25] (rows=3 width=96) predicate:id is not null - TableScan [TS_3] (rows=3 width=188) - default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"] + TableScan [TS_3] (rows=3 width=96) + default@table2,table2,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val2"] PREHOOK: query: select table1.id, table1.val, table2.val2 from table1 inner join table2 on table1.val = 't1val01' and table1.id = table2.id left semi join table3 on table1.dimid = table3.id PREHOOK: type: QUERY @@ -186,44 +186,44 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_18] - Select Operator [SEL_17] (rows=5 width=232) + Select Operator [SEL_17] (rows=1 width=187) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_28] (rows=5 width=232) + Merge Join Operator [MERGEJOIN_28] (rows=1 width=96) Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col4"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_8] (rows=3 width=188) + Select Operator [SEL_8] (rows=3 width=96) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=3 width=188) + Filter Operator [FIL_26] (rows=3 width=96) predicate:id is not null - TableScan [TS_6] (rows=3 width=188) - default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"] + TableScan [TS_6] (rows=3 width=96) + default@table2,table2,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val2"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_27] (rows=5 width=211) + Merge Join Operator [MERGEJOIN_27] (rows=1 width=4) Conds:RS_11._col2=RS_12._col0(Left Semi),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col2 - Select Operator [SEL_2] (rows=5 width=192) + Select Operator [SEL_2] (rows=2 width=99) Output:["_col0","_col2"] - Filter Operator [FIL_24] (rows=5 width=192) + Filter Operator [FIL_24] (rows=2 width=99) predicate:((val = 't1val01') and dimid is not null and id is not null) - TableScan [TS_0] (rows=10 width=192) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","dimid"] + TableScan [TS_0] (rows=10 width=99) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","dimid"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=5 width=4) + Group By Operator [GBY_10] (rows=2 width=4) Output:["_col0"],keys:_col0 Select Operator [SEL_5] (rows=5 width=4) Output:["_col0"] Filter Operator [FIL_25] (rows=5 width=4) predicate:id is not null TableScan [TS_3] (rows=5 width=4) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table2.val2 from table1 left semi join table3 on table1.dimid = table3.id inner join table2 on table1.val = 't1val01' and table1.id = table2.id PREHOOK: type: QUERY @@ -255,28 +255,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=4) + Merge Join Operator [MERGEJOIN_17] (rows=1 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=1 width=372) + Select Operator [SEL_2] (rows=1 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=1 width=372) + Filter Operator [FIL_15] (rows=1 width=185) predicate:false - TableScan [TS_0] (rows=10 width=372) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1"] + TableScan [TS_0] (rows=10 width=185) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=5 width=4) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=5 width=4) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=5 width=4) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) TableScan [TS_3] (rows=5 width=4) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid <> 100 PREHOOK: type: QUERY @@ -303,28 +303,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=413) + Merge Join Operator [MERGEJOIN_17] (rows=3 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=5 width=376) + Select Operator [SEL_2] (rows=3 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=5 width=376) + Filter Operator [FIL_15] (rows=3 width=189) predicate:(dimid = 100) - TableScan [TS_0] (rows=10 width=376) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=5 width=4) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=5 width=4) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=5 width=4) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) TableScan [TS_3] (rows=5 width=4) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid IN (100,200) PREHOOK: type: QUERY @@ -353,28 +353,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=4) + Merge Join Operator [MERGEJOIN_17] (rows=1 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=1 width=372) + Select Operator [SEL_2] (rows=1 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=1 width=372) + Filter Operator [FIL_15] (rows=1 width=185) predicate:false - TableScan [TS_0] (rows=10 width=372) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1"] + TableScan [TS_0] (rows=10 width=185) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=5 width=4) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=5 width=4) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=5 width=4) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) TableScan [TS_3] (rows=5 width=4) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid = 200 PREHOOK: type: QUERY @@ -401,28 +401,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=413) + Merge Join Operator [MERGEJOIN_17] (rows=3 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=5 width=376) + Select Operator [SEL_2] (rows=3 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=5 width=376) + Filter Operator [FIL_15] (rows=3 width=189) predicate:(dimid = 100) - TableScan [TS_0] (rows=10 width=376) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=5 width=4) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=5 width=4) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=5 width=4) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) TableScan [TS_3] (rows=5 width=4) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid = 100 PREHOOK: type: QUERY @@ -451,28 +451,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=413) + Merge Join Operator [MERGEJOIN_17] (rows=3 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=5 width=376) + Select Operator [SEL_2] (rows=3 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=5 width=376) + Filter Operator [FIL_15] (rows=3 width=189) predicate:(dimid = 100) - TableScan [TS_0] (rows=10 width=376) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=5 width=4) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=5 width=4) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=5 width=4) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) TableScan [TS_3] (rows=5 width=4) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/cross_prod_3.q.out ql/src/test/results/clientpositive/llap/cross_prod_3.q.out index 94fe942131..76200c69f3 100644 --- ql/src/test/results/clientpositive/llap/cross_prod_3.q.out +++ ql/src/test/results/clientpositive/llap/cross_prod_3.q.out @@ -50,14 +50,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -70,14 +70,14 @@ STAGE PLANS: input vertices: 1 Map 2 2 Map 3 - Statistics: Num rows: 22 Data size: 8096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 3916 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 22 Data size: 8096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 3916 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 22 Data size: 8096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 3916 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -108,19 +108,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/cte_5.q.out ql/src/test/results/clientpositive/llap/cte_5.q.out index dad6cd8a70..7823f1c7d6 100644 --- ql/src/test/results/clientpositive/llap/cte_5.q.out +++ ql/src/test/results/clientpositive/llap/cte_5.q.out @@ -85,9 +85,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=2 width=90) + Select Operator [SEL_9] (rows=2 width=4) Output:["_col0"] - Merge Join Operator [MERGEJOIN_13] (rows=2 width=90) + Merge Join Operator [MERGEJOIN_13] (rows=2 width=8) Conds:(Inner) <-Map 1 [XPROD_EDGE] llap XPROD_EDGE [RS_6] @@ -95,7 +95,7 @@ Stage-0 Filter Operator [FIL_11] (rows=1 width=4) predicate:(UDFToDouble(colnum) = 5.0) TableScan [TS_0] (rows=1 width=4) - mydb@q1,a,Tbl:COMPLETE,Col:NONE,Output:["colnum"] + mydb@q1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["colnum"] <-Map 3 [XPROD_EDGE] llap XPROD_EDGE [RS_7] Select Operator [SEL_5] (rows=2 width=85) diff --git ql/src/test/results/clientpositive/llap/cte_mat_5.q.out ql/src/test/results/clientpositive/llap/cte_mat_5.q.out index 92d8601ee0..9121f49cd4 100644 --- ql/src/test/results/clientpositive/llap/cte_mat_5.q.out +++ ql/src/test/results/clientpositive/llap/cte_mat_5.q.out @@ -94,7 +94,7 @@ Stage-3 Filter Operator [FIL_18] (rows=1 width=4) predicate:colnum is not null TableScan [TS_5] (rows=1 width=4) - mydb@q1,a,Tbl:COMPLETE,Col:NONE,Output:["colnum"] + mydb@q1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["colnum"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:UDFToDouble(_col0) diff --git ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out index d2552fe867..253d5b7472 100644 --- ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out +++ ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out @@ -47,7 +47,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} numFiles 1 numRows 2 rawDataSize 634 @@ -73,17 +73,17 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 col_name amount data_type decimal(10,3) -min -max -num_nulls -distinct_count +min 12.123 +max 123.123 +num_nulls 0 +distinct_count 2 avg_col_len max_col_len num_trues num_falses -bitVector +bitVector HL comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 diff --git ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out index acd607628a..e45d49e602 100644 --- ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out +++ ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out @@ -140,6 +140,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,6 +212,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket2_1 select * from src @@ -212,18 +252,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) - Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -233,10 +273,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/dp_counter_mm.q.out ql/src/test/results/clientpositive/llap/dp_counter_mm.q.out index 662c02a778..f73278c25c 100644 --- ql/src/test/results/clientpositive/llap/dp_counter_mm.q.out +++ ql/src/test/results/clientpositive/llap/dp_counter_mm.q.out @@ -17,10 +17,12 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 57 - CREATED_FILES: 57 + CREATED_FILES: 61 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 57 RECORDS_OUT_1_default.src2: 84 + RECORDS_OUT_INTERMEDIATE_Map_1: 57 PREHOOK: query: insert into table src2 partition (value) select * from src where key < 200 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -28,10 +30,12 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 64 - CREATED_FILES: 121 + CREATED_FILES: 125 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2: 189 + RECORDS_OUT_INTERMEDIATE_Map_1: 121 PREHOOK: query: drop table src2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@src2 @@ -47,10 +51,12 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 121 - CREATED_FILES: 121 + CREATED_FILES: 125 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2: 189 + RECORDS_OUT_INTERMEDIATE_Map_1: 121 PREHOOK: query: insert into table src2 partition (value) select * from src where key < 300 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -58,10 +64,12 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 63 - CREATED_FILES: 184 + CREATED_FILES: 188 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 184 RECORDS_OUT_1_default.src2: 292 + RECORDS_OUT_INTERMEDIATE_Map_1: 184 PREHOOK: query: drop table src2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@src2 @@ -86,11 +94,13 @@ PREHOOK: Output: default@src3 Stage-2 FILE SYSTEM COUNTERS: Stage-2 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 121 - CREATED_FILES: 121 + CREATED_FILES: 129 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2: 84 RECORDS_OUT_2_default.src3: 105 + RECORDS_OUT_INTERMEDIATE_Map_1: 121 PREHOOK: query: from src insert into table src2 partition (value) select * where key < 100 insert into table src3 partition (value) select * where key >= 100 and key < 300 @@ -101,11 +111,13 @@ PREHOOK: Output: default@src3 Stage-2 FILE SYSTEM COUNTERS: Stage-2 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 63 - CREATED_FILES: 184 + CREATED_FILES: 192 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 184 RECORDS_OUT_1_default.src2: 84 RECORDS_OUT_2_default.src3: 208 + RECORDS_OUT_INTERMEDIATE_Map_1: 184 PREHOOK: query: drop table src2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@src2 @@ -125,11 +137,14 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 121 - CREATED_FILES: 121 + CREATED_FILES: 125 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 - RECORDS_IN_Map_3: 500 + RECORDS_IN_Map_4: 500 + RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2: 189 + RECORDS_OUT_INTERMEDIATE_Map_1: 57 + RECORDS_OUT_INTERMEDIATE_Map_4: 64 PREHOOK: query: insert into table src2 partition (value) select temps.* from ( select * from src where key < 100 @@ -141,8 +156,11 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 63 - CREATED_FILES: 184 + CREATED_FILES: 188 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 - RECORDS_IN_Map_3: 500 + RECORDS_IN_Map_4: 500 + RECORDS_OUT_0: 184 RECORDS_OUT_1_default.src2: 292 + RECORDS_OUT_INTERMEDIATE_Map_1: 57 + RECORDS_OUT_INTERMEDIATE_Map_4: 127 diff --git ql/src/test/results/clientpositive/llap/dp_counter_non_mm.q.out ql/src/test/results/clientpositive/llap/dp_counter_non_mm.q.out index 85f9b9d7a3..e2ff9a6b35 100644 --- ql/src/test/results/clientpositive/llap/dp_counter_non_mm.q.out +++ ql/src/test/results/clientpositive/llap/dp_counter_non_mm.q.out @@ -17,10 +17,12 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 57 - CREATED_FILES: 57 + CREATED_FILES: 61 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 57 RECORDS_OUT_1_default.src2: 84 + RECORDS_OUT_INTERMEDIATE_Map_1: 57 PREHOOK: query: insert into table src2 partition (value) select * from src where key < 200 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -28,10 +30,12 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 64 - CREATED_FILES: 121 + CREATED_FILES: 125 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2: 189 + RECORDS_OUT_INTERMEDIATE_Map_1: 121 PREHOOK: query: drop table src2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@src2 @@ -47,10 +51,12 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 121 - CREATED_FILES: 121 + CREATED_FILES: 125 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2: 189 + RECORDS_OUT_INTERMEDIATE_Map_1: 121 PREHOOK: query: insert into table src2 partition (value) select * from src where key < 300 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -58,10 +64,12 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 63 - CREATED_FILES: 184 + CREATED_FILES: 188 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 184 RECORDS_OUT_1_default.src2: 292 + RECORDS_OUT_INTERMEDIATE_Map_1: 184 PREHOOK: query: drop table src2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@src2 @@ -86,11 +94,13 @@ PREHOOK: Output: default@src3 Stage-2 FILE SYSTEM COUNTERS: Stage-2 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 121 - CREATED_FILES: 121 + CREATED_FILES: 129 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2: 84 RECORDS_OUT_2_default.src3: 105 + RECORDS_OUT_INTERMEDIATE_Map_1: 121 PREHOOK: query: from src insert into table src2 partition (value) select * where key < 100 insert into table src3 partition (value) select * where key >= 100 and key < 300 @@ -101,11 +111,13 @@ PREHOOK: Output: default@src3 Stage-2 FILE SYSTEM COUNTERS: Stage-2 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 63 - CREATED_FILES: 184 + CREATED_FILES: 192 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 + RECORDS_OUT_0: 184 RECORDS_OUT_1_default.src2: 84 RECORDS_OUT_2_default.src3: 208 + RECORDS_OUT_INTERMEDIATE_Map_1: 184 PREHOOK: query: drop table src2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@src2 @@ -125,11 +137,14 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 121 - CREATED_FILES: 121 + CREATED_FILES: 125 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 - RECORDS_IN_Map_3: 500 + RECORDS_IN_Map_4: 500 + RECORDS_OUT_0: 121 RECORDS_OUT_1_default.src2: 189 + RECORDS_OUT_INTERMEDIATE_Map_1: 57 + RECORDS_OUT_INTERMEDIATE_Map_4: 64 PREHOOK: query: insert into table src2 partition (value) select temps.* from ( select * from src where key < 100 @@ -141,8 +156,11 @@ PREHOOK: Output: default@src2 Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_DYNAMIC_PARTITIONS: 63 - CREATED_FILES: 184 + CREATED_FILES: 188 DESERIALIZE_ERRORS: 0 RECORDS_IN_Map_1: 500 - RECORDS_IN_Map_3: 500 + RECORDS_IN_Map_4: 500 + RECORDS_OUT_0: 184 RECORDS_OUT_1_default.src2: 292 + RECORDS_OUT_INTERMEDIATE_Map_1: 57 + RECORDS_OUT_INTERMEDIATE_Map_4: 127 diff --git ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index 6772e5dd58..91557e8cc5 100644 --- ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -41,13 +41,14 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 8 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Map 1 <- Reducer 9 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +67,7 @@ STAGE PLANS: value expressions: ROW__ID (type: struct) Execution mode: llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: s @@ -215,6 +216,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -231,7 +252,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT - Reducer 8 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -290,6 +339,10 @@ STAGE PLANS: Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table Stage: Stage-1 Move Operator @@ -305,6 +358,10 @@ STAGE PLANS: Stage: Stage-9 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl PREHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) @@ -323,10 +380,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -344,7 +402,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: s @@ -409,7 +467,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -439,6 +525,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl PREHOOK: query: explain merge into acidTbl as t using ( select * from nonAcidOrcTbl where a > 0 @@ -476,14 +566,15 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 10 <- Reducer 8 (BROADCAST_EDGE) - Map 9 <- Union 2 (CONTAINS) - Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Map 10 <- Union 2 (CONTAINS) + Map 11 <- Reducer 9 (BROADCAST_EDGE) + Reducer 3 <- Map 11 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Reducer 3 (SIMPLE_EDGE) Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -523,22 +614,6 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: t - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl__col0_min) AND DynamicValue(RS_10_nonacidorctbl__col0_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl__col0_bloom_filter))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: ROW__ID (type: struct) - Execution mode: llap - LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan alias: nonacidorctbl filterExpr: (b > 0) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -570,6 +645,22 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs + Map 11 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl__col0_min) AND DynamicValue(RS_10_nonacidorctbl__col0_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl__col0_bloom_filter))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: ROW__ID (type: struct) + Execution mode: llap + LLAP IO: may be used (ACID table) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -693,6 +784,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -709,10 +820,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) mode: final outputColumnNames: _col0, _col1, _col2 @@ -770,6 +909,10 @@ STAGE PLANS: Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table Stage: Stage-1 Move Operator @@ -785,6 +928,10 @@ STAGE PLANS: Stage: Stage-9 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl PREHOOK: query: drop database if exists type2_scd_helper cascade PREHOOK: type: DROPDATABASE diff --git ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out index d6103a8fb3..81b6bded11 100644 --- ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out @@ -319,49 +319,49 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs @@ -445,14 +445,14 @@ STAGE PLANS: aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 8 Execution mode: llap @@ -476,14 +476,14 @@ STAGE PLANS: aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index ecc467a08e..15a97a78c1 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -173,19 +173,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -195,11 +195,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -225,6 +225,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_orc PREHOOK: query: explain insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 PREHOOK: type: QUERY @@ -249,20 +253,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap @@ -273,15 +277,15 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -289,11 +293,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -319,6 +323,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit_orc PREHOOK: query: explain insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -342,19 +350,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -364,11 +372,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -393,6 +401,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_orc PREHOOK: query: explain insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -416,19 +428,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -438,11 +450,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -467,6 +479,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort_orc PREHOOK: query: insert overwrite table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY @@ -558,19 +574,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -580,11 +596,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -610,6 +626,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_orc PREHOOK: query: explain insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 PREHOOK: type: QUERY @@ -634,20 +654,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap @@ -658,15 +678,15 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -674,11 +694,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -704,6 +724,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit_orc PREHOOK: query: explain insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -727,19 +751,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -749,11 +773,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -778,6 +802,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_orc PREHOOK: query: explain insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -801,19 +829,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -823,11 +851,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -852,6 +880,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort_orc PREHOOK: query: insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY @@ -944,7 +976,7 @@ Database: default Table: over1k_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 32 rawDataSize 640 @@ -984,7 +1016,7 @@ Database: default Table: over1k_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 6 rawDataSize 120 @@ -1024,7 +1056,7 @@ Database: default Table: over1k_part_limit_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 14 rawDataSize 280 @@ -1064,7 +1096,7 @@ Database: default Table: over1k_part_limit_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 6 rawDataSize 120 @@ -1103,7 +1135,7 @@ Database: default Table: over1k_part_buck_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 32 rawDataSize 640 @@ -1142,7 +1174,7 @@ Database: default Table: over1k_part_buck_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 4 rawDataSize 80 @@ -1181,7 +1213,7 @@ Database: default Table: over1k_part_buck_sort_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 32 rawDataSize 640 @@ -1220,7 +1252,7 @@ Database: default Table: over1k_part_buck_sort_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 4 rawDataSize 80 @@ -1323,42 +1355,79 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1379,6 +1448,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY @@ -1402,19 +1475,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1424,11 +1497,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1454,6 +1527,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 PREHOOK: type: QUERY @@ -1478,15 +1555,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) Execution mode: vectorized, llap @@ -1497,22 +1574,22 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col0 = 27) or _col0 is null) (type: boolean) - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -1520,11 +1597,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1550,6 +1627,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY @@ -1567,48 +1648,85 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 5 Data size: 555 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1629,6 +1747,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY @@ -1652,20 +1774,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1675,15 +1797,15 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1709,6 +1831,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY @@ -1750,7 +1876,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1790,7 +1916,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -1891,7 +2017,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1931,7 +2057,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2030,43 +2156,80 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: float) sort order: + Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col3 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 2 Data size: 3400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3528 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 3528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2086,6 +2249,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2_orc PREHOOK: query: explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -2109,19 +2276,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t = 27) or t is null) (type: boolean) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2131,11 +2298,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 1342 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2160,6 +2327,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2_orc PREHOOK: query: insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -2200,7 +2371,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2239,7 +2410,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2414,7 +2585,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2453,7 +2624,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 2 rawDataSize 52 diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index 764b58e9c7..78b9a443b8 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -182,6 +182,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part PREHOOK: query: explain insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 PREHOOK: type: QUERY @@ -276,6 +280,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit PREHOOK: query: explain insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -350,6 +358,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck PREHOOK: query: explain insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -424,6 +436,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort PREHOOK: query: insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -567,6 +583,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part PREHOOK: query: explain insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 PREHOOK: type: QUERY @@ -661,6 +681,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit PREHOOK: query: explain insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -735,6 +759,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck PREHOOK: query: explain insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -809,6 +837,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort PREHOOK: query: insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -901,7 +933,7 @@ Database: default Table: over1k_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 32 rawDataSize 830 @@ -941,7 +973,7 @@ Database: default Table: over1k_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 6 rawDataSize 156 @@ -981,7 +1013,7 @@ Database: default Table: over1k_part_limit #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 14 rawDataSize 362 @@ -1021,7 +1053,7 @@ Database: default Table: over1k_part_limit #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 6 rawDataSize 156 @@ -1060,7 +1092,7 @@ Database: default Table: over1k_part_buck #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 8 numRows 32 rawDataSize 830 @@ -1099,7 +1131,7 @@ Database: default Table: over1k_part_buck #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 4 numRows 6 rawDataSize 156 @@ -1138,7 +1170,7 @@ Database: default Table: over1k_part_buck_sort #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 8 numRows 32 rawDataSize 830 @@ -1177,7 +1209,7 @@ Database: default Table: over1k_part_buck_sort #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 4 numRows 6 rawDataSize 156 @@ -1280,6 +1312,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1316,6 +1349,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1336,6 +1405,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY @@ -1411,6 +1484,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 PREHOOK: type: QUERY @@ -1507,6 +1584,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY @@ -1524,6 +1605,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1566,6 +1648,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1586,6 +1704,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY @@ -1666,6 +1788,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY @@ -1707,7 +1833,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1747,7 +1873,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -1848,7 +1974,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1888,7 +2014,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -1987,6 +2113,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2024,6 +2151,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2043,6 +2206,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2 PREHOOK: query: explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -2117,6 +2284,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2 PREHOOK: query: insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -2157,7 +2328,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2196,7 +2367,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2296,7 +2467,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2335,7 +2506,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2487,6 +2658,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 PREHOOK: type: QUERY @@ -2563,6 +2738,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 PREHOOK: type: QUERY @@ -2639,6 +2818,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 PREHOOK: type: QUERY @@ -2715,6 +2898,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and s="foo" PREHOOK: type: QUERY @@ -2791,6 +2978,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 and s="foo" PREHOOK: type: QUERY @@ -2867,6 +3058,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 and s="foo" PREHOOK: type: QUERY @@ -2882,6 +3077,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -2903,8 +3101,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part3 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2926,6 +3160,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where s="foo" PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out index 8b0fb2c44f..271d4bbfaf 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out @@ -146,6 +146,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, @@ -198,7 +202,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 151 @@ -256,7 +260,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 186 @@ -381,6 +385,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, @@ -427,7 +435,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 151 @@ -485,7 +493,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 186 @@ -601,6 +609,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ss_part + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -620,6 +649,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, @@ -672,7 +705,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 151 @@ -730,7 +763,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 186 @@ -797,7 +830,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -813,7 +846,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col2 (type: int) + sort order: + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) @@ -834,6 +868,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ss_part + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -853,6 +908,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, @@ -899,7 +958,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 151 @@ -957,7 +1016,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 186 @@ -1092,42 +1151,63 @@ STAGE PLANS: Map Operator Tree: TableScan alias: ss_orc - Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_sold_date_sk <= 2452638) and (ss_sold_date_sk >= 2452617)) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.ss_part_orc + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1147,6 +1227,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part_orc PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) select ss_net_paid_inc_tax, @@ -1199,7 +1283,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 88 @@ -1257,7 +1341,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 104 @@ -1324,43 +1408,65 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: ss_orc - Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_sold_date_sk <= 2452638) and (ss_sold_date_sk >= 2452617)) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col2 (type: int) + sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.ss_part_orc + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll') + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1380,6 +1486,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part_orc PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) select ss_net_paid_inc_tax, @@ -1426,7 +1536,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 88 @@ -1484,7 +1594,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 104 @@ -1590,6 +1700,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1616,7 +1727,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1636,6 +1747,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.hive13_dp1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: k1, k2, day + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + keys: day (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1655,6 +1802,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.hive13_dp1 PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`) select @@ -1721,6 +1872,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1747,7 +1899,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1767,6 +1919,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.hive13_dp1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: k1, k2, day + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + keys: day (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 967 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1786,6 +1974,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.hive13_dp1 PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`) select diff --git ql/src/test/results/clientpositive/llap/except_distinct.q.out ql/src/test/results/clientpositive/llap/except_distinct.q.out index c7573c7bb5..59839e0185 100644 --- ql/src/test/results/clientpositive/llap/except_distinct.q.out +++ ql/src/test/results/clientpositive/llap/except_distinct.q.out @@ -701,42 +701,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: int) outputColumnNames: value - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -746,32 +746,32 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), 2 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), sum(_col2) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -781,17 +781,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((_col1 * 2) = _col2) and (_col1 > 0)) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -803,32 +803,32 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), 1 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), sum(_col2) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Union 3 Vertex: Union 3 diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 5adf401b25..83c9cf96fd 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -44,6 +44,9 @@ POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part partitio POSTHOOK: type: QUERY Plan optimized by CBO. +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + Stage-3 Stats Work{} Stage-0 @@ -52,13 +55,26 @@ Stage-3 Stage-2 Dependency Collection{} Stage-1 - Map 1 llap - File Output Operator [FS_3] - table:{"name:":"default.src_orc_merge_test_part"} - Select Operator [SEL_1] (rows=500 width=95) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Reducer 2 llap + File Output Operator [FS_6] + Select Operator [SEL_5] (rows=1 width=1077) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_4] (rows=1 width=1077) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + <-Map 1 [SIMPLE_EDGE] llap + File Output Operator [FS_3] + table:{"name:":"default.src_orc_merge_test_part"} + Select Operator [SEL_1] (rows=500 width=95) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1 + Group By Operator [GBY_2] (rows=1 width=1061) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"],keys:ds, ts + Select Operator [SEL_1] (rows=500 width=292) + Output:["key","value","ds","ts"] + Please refer to the previous Select Operator [SEL_1] PREHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src PREHOOK: type: QUERY @@ -78,6 +94,7 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Stage-3 Stats Work{} @@ -87,23 +104,36 @@ Stage-3 Stage-2 Dependency Collection{} Stage-1 - Reducer 2 llap - File Output Operator [FS_7] - table:{"name:":"default.src_orc_merge_test_part"} - Select Operator [SEL_6] (rows=100 width=95) - Output:["_col0","_col1"] - Limit [LIM_5] (rows=100 width=178) - Number of rows:100 - Select Operator [SEL_4] (rows=100 width=178) - Output:["_col0","_col1"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Limit [LIM_2] (rows=100 width=178) + Reducer 3 llap + File Output Operator [FS_6] + Select Operator [SEL_5] (rows=1 width=1077) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_4] (rows=1 width=1077) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] llap + File Output Operator [FS_7] + table:{"name:":"default.src_orc_merge_test_part"} + Select Operator [SEL_6] (rows=100 width=95) + Output:["_col0","_col1"] + Limit [LIM_5] (rows=100 width=178) Number of rows:100 - Select Operator [SEL_1] (rows=500 width=178) + Select Operator [SEL_4] (rows=100 width=178) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Limit [LIM_2] (rows=100 width=178) + Number of rows:100 + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1 + Group By Operator [GBY_2] (rows=1 width=1061) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"],keys:ds, ts + Select Operator [SEL_1] (rows=100 width=292) + Output:["key","value","ds","ts"] + Please refer to the previous Select Operator [SEL_6] PREHOOK: query: explain select count(1) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' PREHOOK: type: QUERY @@ -130,16 +160,16 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_8] - Group By Operator [GBY_6] (rows=1 width=204) + Group By Operator [GBY_6] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_5] - Group By Operator [GBY_4] (rows=1 width=204) + Group By Operator [GBY_4] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_2] (rows=500 width=272) + Select Operator [SEL_2] (rows=500 width=95) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=272) - default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=500 width=95) + default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: alter table src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') concatenate PREHOOK: type: ALTER_PARTITION_MERGE @@ -189,16 +219,16 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_8] - Group By Operator [GBY_6] (rows=1 width=204) + Group By Operator [GBY_6] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_5] - Group By Operator [GBY_4] (rows=1 width=204) + Group By Operator [GBY_4] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_2] (rows=500 width=272) + Select Operator [SEL_2] (rows=500 width=95) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=272) - default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=500 width=95) + default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table src_orc_merge_test_part PREHOOK: type: DROPTABLE @@ -3030,14 +3060,14 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Group By Operator [GBY_4] (rows=1 width=204) + Group By Operator [GBY_4] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=5 width=188) + Select Operator [SEL_1] (rows=5 width=93) Output:["_col0","_col1"] - TableScan [TS_0] (rows=5 width=188) - default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=5 width=93) + default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: alter table tgt_rc_merge_test concatenate PREHOOK: type: ALTER_TABLE_MERGE @@ -3102,14 +3132,14 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Group By Operator [GBY_4] (rows=1 width=204) + Group By Operator [GBY_4] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=5 width=188) + Select Operator [SEL_1] (rows=5 width=93) Output:["_col0","_col1"] - TableScan [TS_0] (rows=5 width=188) - default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=5 width=93) + default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table src_rc_merge_test PREHOOK: type: DROPTABLE @@ -3753,20 +3783,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=8) + Select Operator [SEL_5] (rows=48 width=15) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=8) + Merge Join Operator [MERGEJOIN_7] (rows=48 width=15) Conds:RS_2.key=RS_3.key(Inner),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key - TableScan [TS_0] (rows=26 width=8) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=26 width=7) + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key - TableScan [TS_1] (rows=26 width=8) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_1] (rows=26 width=7) + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key AND a.value <=> b.value PREHOOK: type: QUERY @@ -3783,20 +3813,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=8) + Select Operator [SEL_5] (rows=48 width=15) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=8) + Merge Join Operator [MERGEJOIN_7] (rows=48 width=15) Conds:RS_2.key, value=RS_3.key, value(Inner),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key, value - TableScan [TS_0] (rows=26 width=8) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=26 width=7) + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key, value - TableScan [TS_1] (rows=26 width=8) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_1] (rows=26 width=7) + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY @@ -3813,20 +3843,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=8) + Select Operator [SEL_5] (rows=48 width=15) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=8) + Merge Join Operator [MERGEJOIN_7] (rows=48 width=15) Conds:RS_2.key=RS_3.key(Right Outer),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key - TableScan [TS_0] (rows=26 width=8) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=26 width=7) + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key - TableScan [TS_1] (rows=26 width=8) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_1] (rows=26 width=7) + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY @@ -3843,20 +3873,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=8) + Select Operator [SEL_5] (rows=48 width=15) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=8) + Merge Join Operator [MERGEJOIN_7] (rows=48 width=15) Conds:RS_2.key=RS_3.key(Inner),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key - TableScan [TS_0] (rows=26 width=8) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=26 width=7) + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key - TableScan [TS_1] (rows=26 width=8) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_1] (rows=26 width=7) + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY @@ -3873,20 +3903,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=8) + Select Operator [SEL_5] (rows=48 width=15) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=8) + Merge Join Operator [MERGEJOIN_7] (rows=48 width=15) Conds:RS_2.key=RS_3.key(Left Outer),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key - TableScan [TS_0] (rows=26 width=8) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=26 width=7) + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key - TableScan [TS_1] (rows=26 width=8) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_1] (rows=26 width=7) + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table sales PREHOOK: type: DROPTABLE @@ -4834,8 +4864,12 @@ Plan not optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 2 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -4845,49 +4879,75 @@ Stage-4 Stage-3 Dependency Collection{} Stage-2 - Reducer 3 llap - File Output Operator [FS_9] - table:{"name:":"default.part_4"} - Select Operator [SEL_7] (rows=26 width=239) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_6] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_5] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col2 - PTF Operator [PTF_3] (rows=26 width=499) - Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}}] - Select Operator [SEL_2] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_1] - PartitionCols:p_mfgr - TableScan [TS_0] (rows=26 width=231) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] Reducer 5 llap - File Output Operator [FS_20] - table:{"name:":"default.part_5"} - Select Operator [SEL_17] (rows=26 width=247) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - PTF Operator [PTF_16] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col3"}] - Select Operator [SEL_15] (rows=26 width=499) - Output:["_col0","_col2","_col3","_col6"] - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] - PartitionCols:_col2 - Select Operator [SEL_13] (rows=26 width=491) - Output:["sum_window_0","_col1","_col2","_col5"] - PTF Operator [PTF_12] (rows=26 width=491) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_11] (rows=26 width=491) - Output:["_col1","_col2","_col5"] + File Output Operator [FS_7] + Group By Operator [GBY_5] (rows=1 width=2640) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=2576) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')","compute_stats(VALUE._col6, 'hll')"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_9] + table:{"name:":"default.part_4"} + Select Operator [SEL_7] (rows=26 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_6] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}] + Select Operator [SEL_5] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] + SHUFFLE [RS_4] + PartitionCols:_col2 + PTF Operator [PTF_3] (rows=26 width=499) + Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}}] + Select Operator [SEL_2] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_1] + PartitionCols:p_mfgr + TableScan [TS_0] (rows=26 width=231) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] + PARTITION_ONLY_SHUFFLE [RS_2] + PartitionCols:rand() + Select Operator [SEL_1] (rows=26 width=239) + Output:["p_mfgr","p_name","p_size","r","dr","s"] + Please refer to the previous Select Operator [SEL_7] + Reducer 9 llap + File Output Operator [FS_7] + Group By Operator [GBY_5] (rows=1 width=3520) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)","compute_stats(VALUE._col6)","compute_stats(VALUE._col7)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=3424) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')","compute_stats(VALUE._col6, 'hll')","compute_stats(VALUE._col7, 'hll')","compute_stats(VALUE._col8, 'hll')"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_20] + table:{"name:":"default.part_5"} + Select Operator [SEL_17] (rows=26 width=247) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + PTF Operator [PTF_16] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col3"}] + Select Operator [SEL_15] (rows=26 width=499) + Output:["_col0","_col2","_col3","_col6"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col2 - Please refer to the previous PTF Operator [PTF_3] + Select Operator [SEL_13] (rows=26 width=491) + Output:["sum_window_0","_col1","_col2","_col5"] + PTF Operator [PTF_12] (rows=26 width=491) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] + Select Operator [SEL_11] (rows=26 width=491) + Output:["_col1","_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col2 + Please refer to the previous PTF Operator [PTF_3] + PARTITION_ONLY_SHUFFLE [RS_2] + PartitionCols:rand() + Select Operator [SEL_1] (rows=26 width=247) + Output:["p_mfgr","p_name","p_size","s2","r","dr","cud","fv1"] + Please refer to the previous Select Operator [SEL_17] Stage-5 Stats Work{} Stage-1 @@ -5251,7 +5311,9 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Stage-3 Stats Work{} @@ -5261,31 +5323,44 @@ Stage-3 Stage-2 Dependency Collection{} Stage-1 - Reducer 2 llap - File Output Operator [FS_11] - table:{"name:":"default.dest_j1"} - Select Operator [SEL_9] (rows=809 width=95) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_16] (rows=809 width=178) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500 width=87) - Output:["_col0"] - Filter Operator [FIL_14] (rows=500 width=87) - predicate:key is not null - TableScan [TS_0] (rows=500 width=87) - default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Reducer 4 llap + File Output Operator [FS_7] + Group By Operator [GBY_5] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=864) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_11] + table:{"name:":"default.dest_j1"} + Select Operator [SEL_9] (rows=809 width=95) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_16] (rows=809 width=178) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_14] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_15] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_2] + PartitionCols:rand() + Select Operator [SEL_1] (rows=809 width=95) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_9] PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 select src1.key, src2.value diff --git ql/src/test/results/clientpositive/llap/explainuser_2.q.out ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 6b678f3d4c..4f8e327004 100644 --- ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -1727,36 +1727,34 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_16] - Select Operator [SEL_15] (rows=292 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_27] (rows=292 width=10) - Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_25] (rows=266 width=10) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] - <-Select Operator [SEL_5] (rows=242 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=242 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=242 width=10) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=242 width=10) - Output:["_col0"] - Filter Operator [FIL_22] (rows=242 width=10) - predicate:key is not null - TableScan [TS_0] (rows=242 width=10) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=242 width=10) - Output:["_col1"] - Filter Operator [FIL_24] (rows=242 width=10) - predicate:value is not null - TableScan [TS_6] (rows=242 width=10) - default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Merge Join Operator [MERGEJOIN_27] (rows=292 width=10) + Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_25] (rows=266 width=10) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242 width=10) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242 width=10) + predicate:key is not null + TableScan [TS_3] (rows=242 width=10) + default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=242 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242 width=10) + default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242 width=10) + Output:["_col1"] + Filter Operator [FIL_24] (rows=242 width=10) + predicate:value is not null + TableScan [TS_6] (rows=242 width=10) + default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: explain select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key @@ -1804,36 +1802,34 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_16] - Select Operator [SEL_15] (rows=292 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_27] (rows=292 width=10) - Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_25] (rows=266 width=10) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] - <-Select Operator [SEL_5] (rows=242 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=242 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=242 width=10) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=242 width=10) - Output:["_col0"] - Filter Operator [FIL_22] (rows=242 width=10) - predicate:key is not null - TableScan [TS_0] (rows=242 width=10) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=242 width=10) - Output:["_col1"] - Filter Operator [FIL_24] (rows=242 width=10) - predicate:value is not null - TableScan [TS_6] (rows=242 width=10) - default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Merge Join Operator [MERGEJOIN_27] (rows=292 width=10) + Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_25] (rows=266 width=10) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242 width=10) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242 width=10) + predicate:key is not null + TableScan [TS_3] (rows=242 width=10) + default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=242 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242 width=10) + default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242 width=10) + Output:["_col1"] + Filter Operator [FIL_24] (rows=242 width=10) + predicate:value is not null + TableScan [TS_6] (rows=242 width=10) + default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: explain select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key @@ -1962,36 +1958,34 @@ Stage-0 <-Reducer 2 [CONTAINS] llap Reduce Output Operator [RS_24] PartitionCols:_col0 - Select Operator [SEL_15] (rows=292 width=10) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_50] (rows=292 width=10) - Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_48] (rows=266 width=10) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] - <-Select Operator [SEL_5] (rows=242 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_44] (rows=242 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=242 width=10) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=242 width=10) - Output:["_col0"] - Filter Operator [FIL_43] (rows=242 width=10) - predicate:key is not null - TableScan [TS_0] (rows=242 width=10) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=242 width=10) - Output:["_col1"] - Filter Operator [FIL_45] (rows=242 width=10) - predicate:value is not null - TableScan [TS_6] (rows=242 width=10) - default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Merge Join Operator [MERGEJOIN_50] (rows=292 width=10) + Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_48] (rows=266 width=10) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242 width=10) + Output:["_col0"] + Filter Operator [FIL_44] (rows=242 width=10) + predicate:key is not null + TableScan [TS_3] (rows=242 width=10) + default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_2] (rows=242 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_43] (rows=242 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242 width=10) + default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242 width=10) + Output:["_col1"] + Filter Operator [FIL_45] (rows=242 width=10) + predicate:value is not null + TableScan [TS_6] (rows=242 width=10) + default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: explain SELECT x.key, y.value @@ -2592,19 +2586,22 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 10 <- Union 11 (CONTAINS) -Map 12 <- Union 11 (CONTAINS) -Map 13 <- Union 11 (CONTAINS) -Map 16 <- Map 6 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 17 <- Map 6 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 18 <- Map 6 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 19 <- Map 6 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 6 <- Map 15 (BROADCAST_EDGE) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 11 (SIMPLE_EDGE) -Reducer 9 <- Map 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 1 <- Map 9 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 13 <- Union 14 (CONTAINS) +Map 15 <- Union 14 (CONTAINS) +Map 16 <- Union 14 (CONTAINS) +Map 19 <- Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 20 <- Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 21 <- Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 22 <- Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 8 <- Map 9 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 9 <- Map 18 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 14 (SIMPLE_EDGE) +Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 4 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 4 (CUSTOM_SIMPLE_EDGE) Stage-5 Stats Work{} @@ -2614,231 +2611,353 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Union 4 - <-Map 16 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=1677 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_124] (rows=1677 width=10) - Conds:RS_69._col1=SEL_54._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_69] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_123] (rows=27 width=7) - Conds:SEL_10._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 15 [BROADCAST_EDGE] llap - BROADCAST [RS_67] + Reducer 5 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Union 4 [CUSTOM_SIMPLE_EDGE] + <-Map 19 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=1677 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_124] (rows=1677 width=10) + Conds:RS_69._col1=SEL_54._col0(Inner),Output:["_col0","_col3"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_69] + PartitionCols:_col1 + Map Join Operator [MAPJOIN_123] (rows=27 width=7) + Conds:SEL_10._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 18 [BROADCAST_EDGE] llap + BROADCAST [RS_67] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_114] (rows=25 width=7) + predicate:key is not null + TableScan [TS_49] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_10] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_106] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_8] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_54] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_115] (rows=25 width=7) + predicate:value is not null + TableScan [TS_52] (rows=25 width=7) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Map 20 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=1677 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_124] (rows=1677 width=10) + Conds:RS_129._col1=SEL_57._col0(Inner),Output:["_col0","_col3"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_129] + PartitionCols:_col1 + Please refer to the previous Map Join Operator [MAPJOIN_123] + <-Select Operator [SEL_57] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_116] (rows=500 width=10) + predicate:value is not null + TableScan [TS_55] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Map 21 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=1677 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_124] (rows=1677 width=10) + Conds:RS_130._col1=SEL_61._col0(Inner),Output:["_col0","_col3"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_130] + PartitionCols:_col1 + Please refer to the previous Map Join Operator [MAPJOIN_123] + <-Select Operator [SEL_61] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_117] (rows=500 width=10) + predicate:value is not null + TableScan [TS_59] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Map 22 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=1677 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_124] (rows=1677 width=10) + Conds:RS_131._col1=SEL_64._col0(Inner),Output:["_col0","_col3"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_131] + PartitionCols:_col1 + Please refer to the previous Map Join Operator [MAPJOIN_123] + <-Select Operator [SEL_64] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_118] (rows=500 width=10) + predicate:value is not null + TableScan [TS_62] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Reducer 12 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_44] (rows=1239 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_122] (rows=1239 width=10) + Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col1","_col4"] + <-Map 17 [SIMPLE_EDGE] llap + SHUFFLE [RS_42] PartitionCols:_col0 - Select Operator [SEL_51] (rows=25 width=7) + Select Operator [SEL_37] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_114] (rows=25 width=7) + Filter Operator [FIL_112] (rows=500 width=10) predicate:key is not null - TableScan [TS_49] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_10] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_106] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_8] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_54] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_115] (rows=25 width=7) - predicate:value is not null - TableScan [TS_52] (rows=25 width=7) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] - <-Map 17 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=1677 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_124] (rows=1677 width=10) - Conds:RS_129._col1=SEL_57._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_129] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_123] - <-Select Operator [SEL_57] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_116] (rows=500 width=10) - predicate:value is not null - TableScan [TS_55] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] - <-Map 18 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=1677 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_124] (rows=1677 width=10) - Conds:RS_130._col1=SEL_61._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_130] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_123] - <-Select Operator [SEL_61] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_117] (rows=500 width=10) - predicate:value is not null - TableScan [TS_59] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] - <-Map 19 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=1677 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_124] (rows=1677 width=10) - Conds:RS_131._col1=SEL_64._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_131] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_123] - <-Select Operator [SEL_64] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_118] (rows=500 width=10) - predicate:value is not null - TableScan [TS_62] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] - <-Reducer 3 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_20] (rows=634 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_120] (rows=634 width=10) - Conds:Union 2._col1=RS_18._col0(Inner),Output:["_col1","_col4"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_13] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_107] (rows=500 width=10) - predicate:key is not null - TableScan [TS_11] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] llap - Reduce Output Operator [RS_17] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_119] (rows=577 width=10) - Conds:SEL_2._col0=RS_15._col1(Inner),Output:["_col1"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_15] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_10] - <-Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_104] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["value"] - <-Map 5 [CONTAINS] llap - Reduce Output Operator [RS_17] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_119] (rows=577 width=10) - Conds:SEL_5._col0=RS_125._col1(Inner),Output:["_col1"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_125] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_10] - <-Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_105] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_20] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_20] - <-Reducer 9 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_44] (rows=1239 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_122] (rows=1239 width=10) - Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col1","_col4"] - <-Map 14 [SIMPLE_EDGE] llap - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_37] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_112] (rows=500 width=10) - predicate:key is not null - TableScan [TS_35] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_121] (rows=1127 width=10) - Conds:Union 11._col0=RS_39._col1(Inner),Output:["_col1"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_39] + TableScan [TS_35] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] PartitionCols:_col1 - Select Operator [SEL_34] (rows=500 width=10) + Merge Join Operator [MERGEJOIN_121] (rows=1127 width=10) + Conds:Union 14._col0=RS_39._col1(Inner),Output:["_col1"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] + PartitionCols:_col1 + Select Operator [SEL_34] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_111] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_11] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] llap + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_108] (rows=25 width=7) + predicate:value is not null + TableScan [TS_21] (rows=25 width=7) + Output:["value"] + <-Map 15 [CONTAINS] llap + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_109] (rows=500 width=10) + predicate:value is not null + TableScan [TS_24] (rows=500 width=10) + Output:["value"] + <-Map 16 [CONTAINS] llap + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_30] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_110] (rows=500 width=10) + predicate:value is not null + TableScan [TS_28] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_44] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_44] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + <-Reducer 3 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_20] (rows=634 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_120] (rows=634 width=10) + Conds:Union 2._col1=RS_18._col0(Inner),Output:["_col1","_col4"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_13] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_111] (rows=500 width=10) - predicate:(key is not null and value is not null) + Filter Operator [FIL_107] (rows=500 width=10) + predicate:key is not null Please refer to the previous TableScan [TS_11] - <-Union 11 [SIMPLE_EDGE] - <-Map 10 [CONTAINS] llap - Reduce Output Operator [RS_38] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_108] (rows=25 width=7) - predicate:value is not null - TableScan [TS_21] (rows=25 width=7) - Output:["value"] - <-Map 12 [CONTAINS] llap - Reduce Output Operator [RS_38] - PartitionCols:_col0 - Select Operator [SEL_26] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_109] (rows=500 width=10) - predicate:value is not null - TableScan [TS_24] (rows=500 width=10) - Output:["value"] - <-Map 13 [CONTAINS] llap - Reduce Output Operator [RS_38] - PartitionCols:_col0 - Select Operator [SEL_30] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_110] (rows=500 width=10) - predicate:value is not null - TableScan [TS_28] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_44] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_44] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_17] + PartitionCols:_col1 + Map Join Operator [MAPJOIN_119] (rows=577 width=10) + Conds:SEL_2._col0=RS_15._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_15] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_10] + <-Select Operator [SEL_2] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_104] (rows=25 width=7) + predicate:value is not null + TableScan [TS_0] (rows=25 width=7) + Output:["value"] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_17] + PartitionCols:_col1 + Map Join Operator [MAPJOIN_119] (rows=577 width=10) + Conds:SEL_5._col0=RS_125._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_125] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_10] + <-Select Operator [SEL_5] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_105] (rows=500 width=10) + predicate:value is not null + TableScan [TS_3] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + Reducer 6 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] Stage-6 Stats Work{} Stage-1 @@ -2888,26 +3007,29 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 10 <- Map 22 (BROADCAST_EDGE) -Map 14 <- Union 15 (CONTAINS) -Map 19 <- Union 15 (CONTAINS) -Map 20 <- Union 17 (CONTAINS) -Map 23 <- Union 24 (CONTAINS) -Map 30 <- Union 24 (CONTAINS) -Map 31 <- Union 26 (CONTAINS) -Map 32 <- Union 28 (CONTAINS) -Map 9 <- Union 2 (CONTAINS) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 13 <- Map 21 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 16 <- Union 15 (SIMPLE_EDGE), Union 17 (CONTAINS) -Reducer 18 <- Union 17 (SIMPLE_EDGE) -Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) -Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 29 <- Map 10 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 3 <- Map 10 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 12 <- Union 2 (CONTAINS) +Map 13 <- Map 25 (BROADCAST_EDGE) +Map 17 <- Union 18 (CONTAINS) +Map 22 <- Union 18 (CONTAINS) +Map 23 <- Union 20 (CONTAINS) +Map 26 <- Union 27 (CONTAINS) +Map 33 <- Union 27 (CONTAINS) +Map 34 <- Union 29 (CONTAINS) +Map 35 <- Union 31 (CONTAINS) +Reducer 10 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 16 <- Map 24 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 19 <- Union 18 (SIMPLE_EDGE), Union 20 (CONTAINS) +Reducer 21 <- Union 20 (SIMPLE_EDGE) +Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) +Reducer 3 <- Map 13 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 32 <- Map 13 (BROADCAST_EDGE), Union 31 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) Stage-5 Stats Work{} @@ -2917,260 +3039,283 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Reducer 8 llap - File Output Operator [FS_123] - table:{"name:":"default.a"} - Group By Operator [GBY_120] (rows=530 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 29 [CONTAINS] llap - Reduce Output Operator [RS_119] - PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1061 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_114] (rows=484 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_172] (rows=484 width=10) - Conds:RS_111._col1=SEL_107._col1(Inner),Output:["_col0","_col3"] - <-Map 10 [BROADCAST_EDGE] llap - BROADCAST [RS_111] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_171] (rows=27 width=7) - Conds:SEL_15._col0=RS_109._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 22 [BROADCAST_EDGE] llap - BROADCAST [RS_109] - PartitionCols:_col0 - Select Operator [SEL_74] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_162] (rows=25 width=7) - predicate:key is not null - TableScan [TS_72] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_15] (rows=25 width=7) + Reducer 10 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=530 width=10) + Output:["key","value"] + Group By Operator [GBY_120] (rows=530 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 7 [SIMPLE_EDGE] + <-Reducer 32 [CONTAINS] llap + Reduce Output Operator [RS_119] + PartitionCols:_col0, _col1 + Group By Operator [GBY_118] (rows=1061 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_114] (rows=484 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_13] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_107] (rows=440 width=10) - Output:["_col1"] - Group By Operator [GBY_106] (rows=440 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 28 [SIMPLE_EDGE] - <-Map 32 [CONTAINS] llap - Reduce Output Operator [RS_105] - PartitionCols:_col0, _col1 - Group By Operator [GBY_104] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_100] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_166] (rows=500 width=10) - predicate:value is not null - TableScan [TS_98] (rows=500 width=10) - Output:["key","value"] - <-Reducer 27 [CONTAINS] llap - Reduce Output Operator [RS_105] - PartitionCols:_col0, _col1 - Group By Operator [GBY_104] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_97] (rows=381 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_96] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 26 [SIMPLE_EDGE] - <-Map 31 [CONTAINS] llap - Reduce Output Operator [RS_95] - PartitionCols:_col0, _col1 - Group By Operator [GBY_94] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_90] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_165] (rows=500 width=10) - predicate:value is not null - TableScan [TS_88] (rows=500 width=10) - Output:["key","value"] - <-Reducer 25 [CONTAINS] llap - Reduce Output Operator [RS_95] - PartitionCols:_col0, _col1 - Group By Operator [GBY_94] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_87] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_86] (rows=262 width=10) + Map Join Operator [MAPJOIN_172] (rows=484 width=10) + Conds:RS_111._col1=SEL_107._col1(Inner),Output:["_col0","_col3"] + <-Map 13 [BROADCAST_EDGE] llap + BROADCAST [RS_111] + PartitionCols:_col1 + Map Join Operator [MAPJOIN_171] (rows=27 width=7) + Conds:SEL_15._col0=RS_109._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 25 [BROADCAST_EDGE] llap + BROADCAST [RS_109] + PartitionCols:_col0 + Select Operator [SEL_74] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_162] (rows=25 width=7) + predicate:key is not null + TableScan [TS_72] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_15] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_13] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_107] (rows=440 width=10) + Output:["_col1"] + Group By Operator [GBY_106] (rows=440 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 31 [SIMPLE_EDGE] + <-Map 35 [CONTAINS] llap + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_100] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_166] (rows=500 width=10) + predicate:value is not null + TableScan [TS_98] (rows=500 width=10) + Output:["key","value"] + <-Reducer 30 [CONTAINS] llap + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_97] (rows=381 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_96] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 29 [SIMPLE_EDGE] + <-Map 34 [CONTAINS] llap + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_90] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_165] (rows=500 width=10) + predicate:value is not null + TableScan [TS_88] (rows=500 width=10) + Output:["key","value"] + <-Reducer 28 [CONTAINS] llap + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_87] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_86] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 27 [SIMPLE_EDGE] + <-Map 26 [CONTAINS] llap + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_77] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_163] (rows=25 width=7) + predicate:value is not null + TableScan [TS_75] (rows=25 width=7) + Output:["key","value"] + <-Map 33 [CONTAINS] llap + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_80] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_164] (rows=500 width=10) + predicate:value is not null + TableScan [TS_78] (rows=500 width=10) + Output:["key","value"] + <-Reducer 6 [CONTAINS] llap + Reduce Output Operator [RS_119] + PartitionCols:_col0, _col1 + Group By Operator [GBY_118] (rows=1061 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_67] (rows=577 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 16 [CONTAINS] llap + Reduce Output Operator [RS_66] + PartitionCols:_col0, _col1 + Group By Operator [GBY_65] (rows=1155 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_61] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_170] (rows=605 width=10) + Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] + <-Map 24 [SIMPLE_EDGE] llap + SHUFFLE [RS_59] + PartitionCols:_col0 + Select Operator [SEL_54] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_160] (rows=500 width=10) + predicate:key is not null + TableScan [TS_52] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 15 [SIMPLE_EDGE] llap + SHUFFLE [RS_58] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_169] (rows=550 width=10) + Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] + <-Map 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_56] + PartitionCols:_col1 + Select Operator [SEL_51] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_159] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_16] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 21 [SIMPLE_EDGE] llap + SHUFFLE [RS_55] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=381 width=10) + Output:["_col1"] + Group By Operator [GBY_47] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 20 [SIMPLE_EDGE] + <-Map 23 [CONTAINS] llap + Reduce Output Operator [RS_46] + PartitionCols:_col0, _col1 + Group By Operator [GBY_45] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_41] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_158] (rows=500 width=10) + predicate:value is not null + TableScan [TS_39] (rows=500 width=10) + Output:["key","value"] + <-Reducer 19 [CONTAINS] llap + Reduce Output Operator [RS_46] + PartitionCols:_col0, _col1 + Group By Operator [GBY_45] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_38] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_37] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 18 [SIMPLE_EDGE] + <-Map 17 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_28] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=25 width=7) + predicate:value is not null + TableScan [TS_26] (rows=25 width=7) + Output:["key","value"] + <-Map 22 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_31] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=500 width=10) + predicate:value is not null + TableScan [TS_29] (rows=500 width=10) + Output:["key","value"] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_66] + PartitionCols:_col0, _col1 + Group By Operator [GBY_65] (rows=1155 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_25] (rows=550 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_168] (rows=550 width=10) + Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] + <-Map 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=500 width=10) + predicate:key is not null + Please refer to the previous TableScan [TS_16] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col2 + Map Join Operator [MAPJOIN_167] (rows=288 width=10) + Conds:SEL_12._col1=RS_20._col1(Inner),Output:["_col2"] + <-Map 13 [BROADCAST_EDGE] llap + BROADCAST [RS_20] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_15] + <-Select Operator [SEL_12] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_11] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 24 [SIMPLE_EDGE] - <-Map 23 [CONTAINS] llap - Reduce Output Operator [RS_85] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_10] PartitionCols:_col0, _col1 - Group By Operator [GBY_84] (rows=525 width=10) + Group By Operator [GBY_9] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_77] (rows=25 width=7) + Select Operator [SEL_2] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_163] (rows=25 width=7) + Filter Operator [FIL_152] (rows=25 width=7) predicate:value is not null - TableScan [TS_75] (rows=25 width=7) + TableScan [TS_0] (rows=25 width=7) Output:["key","value"] - <-Map 30 [CONTAINS] llap - Reduce Output Operator [RS_85] + <-Map 12 [CONTAINS] llap + Reduce Output Operator [RS_10] PartitionCols:_col0, _col1 - Group By Operator [GBY_84] (rows=525 width=10) + Group By Operator [GBY_9] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_80] (rows=500 width=10) + Select Operator [SEL_5] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=500 width=10) + Filter Operator [FIL_153] (rows=500 width=10) predicate:value is not null - TableScan [TS_78] (rows=500 width=10) + TableScan [TS_3] (rows=500 width=10) Output:["key","value"] - <-Reducer 6 [CONTAINS] llap - Reduce Output Operator [RS_119] - PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1061 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_67] (rows=577 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 13 [CONTAINS] llap - Reduce Output Operator [RS_66] - PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1155 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_61] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_170] (rows=605 width=10) - Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] - <-Map 21 [SIMPLE_EDGE] llap - SHUFFLE [RS_59] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=500 width=10) - predicate:key is not null - TableScan [TS_52] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 12 [SIMPLE_EDGE] llap - SHUFFLE [RS_58] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_169] (rows=550 width=10) - Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] - <-Map 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_56] - PartitionCols:_col1 - Select Operator [SEL_51] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=500 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_16] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 18 [SIMPLE_EDGE] llap - SHUFFLE [RS_55] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=381 width=10) - Output:["_col1"] - Group By Operator [GBY_47] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 17 [SIMPLE_EDGE] - <-Map 20 [CONTAINS] llap - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_41] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=500 width=10) - predicate:value is not null - TableScan [TS_39] (rows=500 width=10) - Output:["key","value"] - <-Reducer 16 [CONTAINS] llap - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_38] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 15 [SIMPLE_EDGE] - <-Map 14 [CONTAINS] llap - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_28] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=25 width=7) - predicate:value is not null - TableScan [TS_26] (rows=25 width=7) - Output:["key","value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_31] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=500 width=10) - predicate:value is not null - TableScan [TS_29] (rows=500 width=10) - Output:["key","value"] - <-Reducer 4 [CONTAINS] llap - Reduce Output Operator [RS_66] - PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1155 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=550 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_168] (rows=550 width=10) - Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] - <-Map 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=500 width=10) - predicate:key is not null - Please refer to the previous TableScan [TS_16] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - PartitionCols:_col2 - Map Join Operator [MAPJOIN_167] (rows=288 width=10) - Conds:SEL_12._col1=RS_20._col1(Inner),Output:["_col2"] - <-Map 10 [BROADCAST_EDGE] llap - BROADCAST [RS_20] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_15] - <-Select Operator [SEL_12] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_11] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] llap - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_152] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["key","value"] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_153] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["key","value"] - File Output Operator [FS_125] - table:{"name:":"default.b"} - Please refer to the previous Group By Operator [GBY_120] - File Output Operator [FS_127] - table:{"name:":"default.c"} - Please refer to the previous Group By Operator [GBY_120] + Reducer 11 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=530 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] + Reducer 9 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=530 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] Stage-6 Stats Work{} Stage-1 @@ -3217,10 +3362,12 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -3230,53 +3377,71 @@ Stage-4 Stage-3 Dependency Collection{} Stage-2 - Reducer 5 llap - File Output Operator [FS_21] - table:{"name:":"default.dest1"} - Select Operator [SEL_19] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_18] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Group By Operator [GBY_13] (rows=1 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_12] - PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_7] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500 width=10) - Output:["key","value"] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_12] - PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - File Output Operator [FS_28] - table:{"name:":"default.dest2"} - Select Operator [SEL_26] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_25] (rows=1 width=464) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_13] + Reducer 6 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_21] + table:{"name:":"default.dest1"} + Select Operator [SEL_19] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_18] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Group By Operator [GBY_13] (rows=1 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_7] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500 width=10) + Output:["key","value"] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_5] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_19] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1320) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1320) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Select Operator [SEL_26] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_25] (rows=1 width=464) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_13] Stage-5 Stats Work{} Stage-1 @@ -3390,11 +3555,13 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) -Map 7 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -3404,71 +3571,91 @@ Stage-4 Stage-3 Dependency Collection{} Stage-2 - Reducer 4 llap - File Output Operator [FS_18] - table:{"name:":"default.dest1"} - Select Operator [SEL_16] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_13] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_7] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_21] - PartitionCols:_col0, _col1 - Group By Operator [GBY_20] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_7] - <-Map 7 [CONTAINS] llap - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_13] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_10] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_9] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_21] - PartitionCols:_col0, _col1 - Group By Operator [GBY_20] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_10] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_13] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_5] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_21] - PartitionCols:_col0, _col1 - Group By Operator [GBY_20] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_5] Reducer 5 llap - File Output Operator [FS_25] - table:{"name:":"default.dest2"} - Select Operator [SEL_23] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_22] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_18] + table:{"name:":"default.dest1"} + Select Operator [SEL_16] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_15] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_7] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_21] + PartitionCols:_col0, _col1 + Group By Operator [GBY_20] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_7] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_10] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_9] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_21] + PartitionCols:_col0, _col1 + Group By Operator [GBY_20] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_10] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_5] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_21] + PartitionCols:_col0, _col1 + Group By Operator [GBY_20] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_5] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1320) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_25] + table:{"name:":"default.dest2"} + Select Operator [SEL_23] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_22] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1320) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_23] Stage-5 Stats Work{} Stage-1 @@ -3499,10 +3686,12 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -3512,57 +3701,77 @@ Stage-4 Stage-3 Dependency Collection{} Stage-2 - Reducer 4 llap - File Output Operator [FS_16] - table:{"name:":"default.dest1"} - Select Operator [SEL_14] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_13] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_12] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_7] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_19] - PartitionCols:_col0, _col1 - Group By Operator [GBY_18] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_7] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_12] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_5] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_19] - PartitionCols:_col0, _col1 - Group By Operator [GBY_18] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_5] Reducer 5 llap - File Output Operator [FS_23] - table:{"name:":"default.dest2"} - Select Operator [SEL_21] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_20] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_16] + table:{"name:":"default.dest1"} + Select Operator [SEL_14] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_13] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_7] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_19] + PartitionCols:_col0, _col1 + Group By Operator [GBY_18] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_7] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_5] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_19] + PartitionCols:_col0, _col1 + Group By Operator [GBY_18] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_5] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_14] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1320) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_23] + table:{"name:":"default.dest2"} + Select Operator [SEL_21] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_20] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1320) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 'hll')","compute_stats(val1, 'hll')","compute_stats(val2, 'hll')"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_21] Stage-5 Stats Work{} Stage-1 diff --git ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out index 1ac5977898..aa44d33420 100644 --- ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out +++ ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out @@ -269,7 +269,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt,zip @@ -404,7 +404,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt,zip @@ -1051,7 +1051,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1097,7 +1097,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1143,7 +1143,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1189,7 +1189,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1281,7 +1281,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1327,7 +1327,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1373,7 +1373,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1419,7 +1419,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1511,7 +1511,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt diff --git ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out index ecc2246b81..f69d8a2ae0 100644 --- ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out @@ -44,23 +44,23 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: g - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + alias: f + Statistics: Num rows: 25 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 15 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1450 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4600 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 1450 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 auto parallelism: true Execution mode: llap @@ -76,7 +76,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -116,29 +116,29 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [g] + /filter_join_breaktask/ds=2008-04-08 [f] Map 4 Map Operator Tree: TableScan alias: m - Statistics: Num rows: 25 Data size: 4700 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2289 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: ((value <> '') and key is not null) (type: boolean) - Statistics: Num rows: 24 Data size: 4512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 4512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 2785 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) null sort order: a sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 24 Data size: 4512 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 2785 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 - value expressions: _col0 (type: int) + value expressions: _col1 (type: string) auto parallelism: true Execution mode: llap LLAP IO: no inputs @@ -153,7 +153,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -197,23 +197,23 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: f - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + alias: g + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true Execution mode: llap @@ -229,7 +229,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -269,7 +269,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [f] + /filter_join_breaktask/ds=2008-04-08 [g] Reducer 2 Execution mode: llap Needs Tagging: false @@ -278,19 +278,19 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2 - Position of Big Table: 0 - Statistics: Num rows: 27 Data size: 5060 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Position of Big Table: 1 + Statistics: Num rows: 25 Data size: 2305 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col3 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 27 Data size: 5060 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 25 Data size: 2305 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 - value expressions: _col0 (type: string) + value expressions: _col0 (type: int) auto parallelism: true Reducer 3 Execution mode: llap @@ -300,21 +300,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col3 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col5 Position of Big Table: 0 - Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2956 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col5 (type: int), _col0 (type: string) + expressions: _col0 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2956 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 29 Data size: 5566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2956 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/groupby2.q.out ql/src/test/results/clientpositive/llap/groupby2.q.out index 5ab62358ed..c45271997e 100644 --- ql/src/test/results/clientpositive/llap/groupby2.q.out +++ ql/src/test/results/clientpositive/llap/groupby2.q.out @@ -27,6 +27,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -81,6 +83,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 309 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 309 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -98,6 +136,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/llap/groupby3.q.out ql/src/test/results/clientpositive/llap/groupby3.q.out index 897946b4ba..d050c4ec69 100644 --- ql/src/test/results/clientpositive/llap/groupby3.q.out +++ ql/src/test/results/clientpositive/llap/groupby3.q.out @@ -45,6 +45,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +74,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reducer 3 @@ -95,6 +97,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -112,6 +142,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out index 876c858f5d..1520232a10 100644 --- ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out +++ ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table_bucketed #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 309 rawDataSize 1482 diff --git ql/src/test/results/clientpositive/llap/insert1.q.out ql/src/test/results/clientpositive/llap/insert1.q.out index bc626824f0..315c660a57 100644 --- ql/src/test/results/clientpositive/llap/insert1.q.out +++ ql/src/test/results/clientpositive/llap/insert1.q.out @@ -38,6 +38,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -59,8 +62,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -78,6 +109,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 PREHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY @@ -93,6 +128,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -114,8 +152,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -133,6 +199,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 PREHOOK: query: create database x PREHOOK: type: CREATEDATABASE @@ -162,6 +232,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -183,8 +256,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -202,6 +303,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 PREHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY @@ -217,6 +322,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -238,8 +346,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -257,6 +393,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 PREHOOK: query: explain from insert2 @@ -280,6 +420,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -301,6 +445,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE @@ -316,8 +473,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -335,6 +535,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-1 Move Operator @@ -349,6 +553,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 PREHOOK: query: CREATE DATABASE db2 PREHOOK: type: CREATEDATABASE diff --git ql/src/test/results/clientpositive/llap/insert_into1.q.out ql/src/test/results/clientpositive/llap/insert_into1.q.out index 61297f04c8..0506404441 100644 --- ql/src/test/results/clientpositive/llap/insert_into1.q.out +++ ql/src/test/results/clientpositive/llap/insert_into1.q.out @@ -26,6 +26,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -41,7 +42,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -67,6 +67,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -84,6 +112,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -149,6 +181,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -164,7 +197,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -190,6 +222,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -207,6 +267,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -272,6 +336,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -287,7 +352,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -313,6 +377,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -330,6 +422,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY @@ -393,6 +489,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -412,8 +511,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -431,6 +558,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: insert overwrite table insert_into1 select 1, 'a' PREHOOK: type: QUERY @@ -456,6 +587,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -475,8 +609,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -494,6 +656,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: insert into insert_into1 select 2, 'b' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/insert_into2.q.out ql/src/test/results/clientpositive/llap/insert_into2.q.out index ef2b875efb..83f0e3dedc 100644 --- ql/src/test/results/clientpositive/llap/insert_into2.q.out +++ ql/src/test/results/clientpositive/llap/insert_into2.q.out @@ -30,6 +30,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -45,7 +46,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -71,6 +71,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -90,6 +126,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY @@ -194,6 +234,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -209,7 +250,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -235,6 +275,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -254,6 +330,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 @@ -327,6 +407,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -342,7 +423,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -368,6 +448,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 9000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -387,6 +503,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 diff --git ql/src/test/results/clientpositive/llap/intersect_all.q.out ql/src/test/results/clientpositive/llap/intersect_all.q.out index 69be5e39b5..1aef092712 100644 --- ql/src/test/results/clientpositive/llap/intersect_all.q.out +++ ql/src/test/results/clientpositive/llap/intersect_all.q.out @@ -1551,42 +1551,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: int) outputColumnNames: value - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1596,24 +1596,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1), count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1623,24 +1623,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE function name: UDTFReplicateRows Select Operator expressions: col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1652,24 +1652,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1), count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Union 3 Vertex: Union 3 diff --git ql/src/test/results/clientpositive/llap/intersect_distinct.q.out ql/src/test/results/clientpositive/llap/intersect_distinct.q.out index 5c4a4d6ff2..82d154e39c 100644 --- ql/src/test/results/clientpositive/llap/intersect_distinct.q.out +++ ql/src/test/results/clientpositive/llap/intersect_distinct.q.out @@ -1153,42 +1153,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: int) outputColumnNames: value - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1198,24 +1198,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -1225,17 +1225,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1247,24 +1247,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Union 3 Vertex: Union 3 diff --git ql/src/test/results/clientpositive/llap/intersect_merge.q.out ql/src/test/results/clientpositive/llap/intersect_merge.q.out index ae372cbbf7..063a0ae5e0 100644 --- ql/src/test/results/clientpositive/llap/intersect_merge.q.out +++ ql/src/test/results/clientpositive/llap/intersect_merge.q.out @@ -55,22 +55,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -78,22 +78,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -101,22 +101,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -124,22 +124,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -147,22 +147,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -174,18 +174,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -195,18 +195,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -216,18 +216,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -237,17 +237,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -260,18 +260,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -281,18 +281,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -327,22 +327,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -350,22 +350,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -373,22 +373,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -396,22 +396,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -423,18 +423,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -444,18 +444,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -465,17 +465,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 4) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -488,18 +488,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -509,18 +509,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -556,22 +556,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -579,22 +579,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -602,22 +602,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -625,22 +625,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -648,22 +648,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -675,18 +675,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -696,18 +696,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -717,18 +717,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -738,17 +738,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -761,18 +761,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -782,18 +782,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -829,22 +829,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -852,22 +852,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -875,22 +875,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -898,22 +898,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -921,22 +921,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -948,18 +948,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -969,18 +969,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -990,18 +990,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1011,17 +1011,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1034,18 +1034,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1055,18 +1055,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -1102,22 +1102,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1125,22 +1125,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1148,22 +1148,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1171,22 +1171,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1194,22 +1194,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1221,18 +1221,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -1242,18 +1242,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -1263,18 +1263,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1284,17 +1284,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1307,18 +1307,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1328,18 +1328,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -1373,22 +1373,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1396,22 +1396,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1419,22 +1419,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1446,18 +1446,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1467,17 +1467,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1490,18 +1490,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1511,18 +1511,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -1556,22 +1556,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1579,22 +1579,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1602,22 +1602,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1629,18 +1629,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 4 Execution mode: llap @@ -1650,24 +1650,24 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col3 = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE function name: UDTFReplicateRows Select Operator expressions: col1 (type: int), col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1680,18 +1680,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 8 Execution mode: llap @@ -1701,18 +1701,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Union 3 Vertex: Union 3 @@ -1747,28 +1747,28 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1776,22 +1776,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: int), value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1803,18 +1803,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 4 Execution mode: llap @@ -1824,24 +1824,24 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col3 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE function name: UDTFReplicateRows Select Operator expressions: col1 (type: int), col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1854,18 +1854,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -1875,31 +1875,31 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int), _col1 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 9 Execution mode: llap @@ -1909,18 +1909,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 diff --git ql/src/test/results/clientpositive/llap/join1.q.out ql/src/test/results/clientpositive/llap/join1.q.out index 661f55caaf..3c0a155cd8 100644 --- ql/src/test/results/clientpositive/llap/join1.q.out +++ ql/src/test/results/clientpositive/llap/join1.q.out @@ -25,7 +25,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -47,7 +48,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -90,6 +91,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -107,6 +136,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/llap/join32_lessSize.q.out ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index 140f87ea42..5b5be13702 100644 --- ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -37,8 +37,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -58,13 +59,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 3 => 25 + Estimated key counts: Map 4 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 40 Data size: 10640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -129,7 +130,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -205,7 +206,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: z @@ -331,6 +332,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 64 Data size: 17152 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -367,6 +415,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value @@ -509,9 +562,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -531,13 +585,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 4 => 25 + Estimated key counts: Map 5 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 4 + 1 Map 5 Position of Big Table: 0 Statistics: Num rows: 39 Data size: 10296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -602,7 +656,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: z @@ -678,7 +732,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [z] - Map 5 + Map 6 Map Operator Tree: TableScan alias: w @@ -753,7 +807,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [w] - Map 6 + Map 7 Map Operator Tree: TableScan alias: y @@ -879,7 +933,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -900,6 +954,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 101 Data size: 26866 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -913,7 +1014,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -936,6 +1037,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value @@ -1074,8 +1180,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1155,7 +1262,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -1172,13 +1279,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 4 => 25 + Estimated key counts: Map 5 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 5 Position of Big Table: 0 Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1247,7 +1354,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -1372,6 +1479,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1408,6 +1562,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value @@ -1548,8 +1707,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1629,7 +1789,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -1700,7 +1860,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 5 + Map 6 Map Operator Tree: TableScan alias: x @@ -1804,7 +1964,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1825,7 +1985,54 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 5 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -1864,7 +2071,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1887,6 +2094,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value @@ -2039,8 +2251,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2062,7 +2275,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -2082,7 +2295,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2096,7 +2309,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -2139,6 +2352,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2156,6 +2397,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, x.value, res.value @@ -2296,8 +2541,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2319,7 +2565,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -2339,7 +2585,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2353,7 +2599,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -2396,6 +2642,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 64 Data size: 17024 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2413,6 +2687,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, y.value, res.value diff --git ql/src/test/results/clientpositive/llap/join46.q.out ql/src/test/results/clientpositive/llap/join46.q.out index 1f6415c98f..92280b5860 100644 --- ql/src/test/results/clientpositive/llap/join46.q.out +++ ql/src/test/results/clientpositive/llap/join46.q.out @@ -62,16 +62,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -79,16 +79,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -102,10 +102,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,16 +169,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -186,19 +186,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -215,10 +215,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -283,14 +283,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -298,17 +298,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -325,10 +325,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -389,16 +389,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -406,16 +406,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -429,10 +429,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -491,14 +491,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -506,14 +506,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -530,10 +530,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -603,14 +603,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -618,14 +618,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -640,10 +640,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -716,14 +716,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -731,14 +731,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -753,10 +753,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -827,14 +827,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -842,14 +842,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -864,10 +864,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -934,16 +934,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -951,16 +951,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -975,10 +975,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1044,14 +1044,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1059,14 +1059,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1081,14 +1081,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or (_col0 = _col3))} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1156,14 +1156,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1171,14 +1171,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1193,10 +1193,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1269,14 +1269,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1284,14 +1284,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1306,10 +1306,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1377,14 +1377,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1392,14 +1392,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1414,10 +1414,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1486,16 +1486,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1503,16 +1503,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1527,10 +1527,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1596,14 +1596,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1611,14 +1611,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1633,10 +1633,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1709,14 +1709,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1724,14 +1724,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1746,10 +1746,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1820,14 +1820,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1835,14 +1835,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1857,10 +1857,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1929,16 +1929,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1946,16 +1946,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1970,10 +1970,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2071,26 +2071,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2098,26 +2098,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2132,10 +2132,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Reducer 3 Execution mode: llap @@ -2148,10 +2148,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} - Statistics: Num rows: 36 Data size: 15240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 15240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2167,10 +2167,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/join_emit_interval.q.out ql/src/test/results/clientpositive/llap/join_emit_interval.q.out index bef673fe3f..fb897b0987 100644 --- ql/src/test/results/clientpositive/llap/join_emit_interval.q.out +++ ql/src/test/results/clientpositive/llap/join_emit_interval.q.out @@ -62,16 +62,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -79,16 +79,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -105,10 +105,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -168,14 +168,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -183,14 +183,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -207,10 +207,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/lineage2.q.out ql/src/test/results/clientpositive/llap/lineage2.q.out index 24ce094de5..b3db244da1 100644 --- ql/src/test/results/clientpositive/llap/lineage2.q.out +++ ql/src/test/results/clientpositive/llap/lineage2.q.out @@ -36,7 +36,7 @@ PREHOOK: query: insert into table dest1 select * from src2 PREHOOK: type: QUERY PREHOOK: Input: default@src2 PREHOOK: Output: default@dest1 -{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select key k, dest1.value from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -467,20 +467,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"length(src1.value) is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"length(src2.value2) is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"length(src1.value) is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"length(src2.value2) is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select * from src1 where length(key) > 2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -530,7 +530,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 'hll')","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 'hll')","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE @@ -552,7 +552,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1 -{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(j.key), 'hll')","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(j.value, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept.dept_name, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 'hll')","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project.project_id, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project.project_name, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -603,7 +603,7 @@ PREHOOK: Output: default@dest_l2 PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l2 -{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(values__tmp__table__1.tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(values__tmp__table__1.tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(values__tmp__table__1.tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(values__tmp__table__1.tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(values__tmp__table__1.tmp_values_col1), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(UDFToByte(values__tmp__table__1.tmp_values_col2), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(UDFToInteger(values__tmp__table__1.tmp_values_col3), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToLong(values__tmp__table__1.tmp_values_col4), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} PREHOOK: query: select * from ( select c1 + c2 x from dest_l2 union all @@ -623,7 +623,7 @@ PREHOOK: Output: default@dest_l3 PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l3 -{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(values__tmp__table__2.tmp_values_col1), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col2, 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col3, 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToInteger(values__tmp__table__2.tmp_values_col4), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id) from dest_l2 a where a.c2 != 10 @@ -702,4 +702,4 @@ from relations lateral view explode(ep1_ids) rel1 as ep1_id PREHOOK: type: QUERY PREHOOK: Input: default@relations PREHOOK: Output: default@rels_exploded -{"version":"1.0","engine":"tez","database":"default","hash":"e76d2efade744d1d5cf74fda064ba6c6","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32)","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e76d2efade744d1d5cf74fda064ba6c6","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32)","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32)","edgeType":"PROJECTION"},{"sources":[8],"targets":[0],"expression":"compute_stats(default.relations.identity, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"expression":"compute_stats(default.relations.type, 'hll')","edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"expression":"compute_stats(default.relations.ep1_src_type, 'hll')","edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"expression":"compute_stats(default.relations.ep1_type, 'hll')","edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"expression":"compute_stats(default.relations.ep2_src_type, 'hll')","edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"expression":"compute_stats(default.relations.ep2_type, 'hll')","edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"compute_stats(CAST( rel1._col11 AS CHAR(32), 'hll')","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"compute_stats(CAST( rel2._col12 AS CHAR(32), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} diff --git ql/src/test/results/clientpositive/llap/lineage3.q.out ql/src/test/results/clientpositive/llap/lineage3.q.out index 66cc6ad5a0..d2a422a481 100644 --- ql/src/test/results/clientpositive/llap/lineage3.q.out +++ ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -10,7 +10,7 @@ insert into table d1 select x + length(y) PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 -{"version":"1.0","engine":"tez","database":"default","hash":"4c9b7b8d89403cef78668f15d393e542","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"4c9b7b8d89403cef78668f15d393e542","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[1,2],"targets":[0],"expression":"compute_stats((UDFToInteger(a.ctinyint) + length(b.cstring1)), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: drop table if exists d2 PREHOOK: type: DROPTABLE PREHOOK: query: create table d2(b varchar(128)) @@ -25,7 +25,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 PREHOOK: Output: default@d2 -{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(x), 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: drop table if exists t PREHOOK: type: DROPTABLE PREHOOK: query: create table t as @@ -51,7 +51,7 @@ where cint is not null and cint < 0 order by cint, cs limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=today -{"version":"1.0","engine":"tez","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1,2],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(default.alltypesorc.cint, 'hll')","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( alltypesorc.cstring1 AS varchar(128)), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'today'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: insert into table dest_l1 partition (ds='tomorrow') select min(cint), cast(min(cstring1) as varchar(128)) as cs from alltypesorc @@ -61,7 +61,7 @@ having min(cbigint) > 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=tomorrow -{"version":"1.0","engine":"tez","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[2],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[3],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[5,3],"targets":[0,1,2],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(min(default.alltypesorc.cint), 'hll')","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( min(default.alltypesorc.cstring1) AS varchar(128)), 'hll')","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'tomorrow'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: select cint, rank() over(order by cint) from alltypesorc where cint > 10 and cint < 10000 limit 10 PREHOOK: type: QUERY @@ -348,21 +348,23 @@ PREHOOK: query: insert into dest_dp1 partition (year) select first, word, year f PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp1 -{"version":"1.0","engine":"tez","database":"default","hash":"b2d38401a3281e74a003d9650df97060","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"b2d38401a3281e74a003d9650df97060","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[6],"targets":[4],"expression":"compute_stats(default.src_dp.first, 'hll')","edgeType":"PROJECTION"},{"sources":[7],"targets":[5],"expression":"compute_stats(default.src_dp.word, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"first"},{"id":5,"vertexType":"COLUMN","vertexId":"word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2 -{"version":"1.0","engine":"tez","database":"default","hash":"237302d8ffd62b5b71d9544b22de7770","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"237302d8ffd62b5b71d9544b22de7770","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[11],"targets":[4,5],"edgeType":"PROJECTION"},{"sources":[8],"targets":[6],"expression":"compute_stats(default.src_dp.first, 'hll')","edgeType":"PROJECTION"},{"sources":[9],"targets":[7],"expression":"compute_stats(default.src_dp.word, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":5,"vertexType":"COLUMN","vertexId":"month"},{"id":6,"vertexType":"COLUMN","vertexId":"first"},{"id":7,"vertexType":"COLUMN","vertexId":"word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":9,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":10,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} PREHOOK: query: insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2@y=0 +Result schema has 3 fields, but we don't get as many dependencies {"version":"1.0","engine":"tez","database":"default","hash":"63e990b47e7ab4eb6f2ea09dfb7453ff","queryText":"insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[6],"targets":[0,1,2],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp3@y=0 +Result schema has 4 fields, but we don't get as many dependencies {"version":"1.0","engine":"tez","database":"default","hash":"6bf71a9d02c0612c63b6f40b15c1e8b3","queryText":"insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2,3],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.day"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: drop table if exists src_dp1 PREHOOK: type: DROPTABLE @@ -385,4 +387,4 @@ PREHOOK: Output: default@dest_dp1@year=0 PREHOOK: Output: default@dest_dp2 PREHOOK: Output: default@dest_dp2@y=1 PREHOOK: Output: default@dest_dp3@y=2 -{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"},{"sources":[11],"targets":[0],"expression":"compute_stats(default.src_dp.first, 'hll')","edgeType":"PROJECTION"},{"sources":[12],"targets":[3],"expression":"compute_stats(default.src_dp.word, 'hll')","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} diff --git ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out index edbf76d935..2bf96d0aae 100644 --- ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out +++ ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out @@ -32,6 +32,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -73,6 +76,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 179000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -126,6 +148,42 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -162,6 +220,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src @@ -203,7 +266,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git ql/src/test/results/clientpositive/llap/llap_partitioned.q.out ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index 59518e5cbd..ac754674ea 100644 --- ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1634,7 +1634,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: oft - Statistics: Num rows: 12288 Data size: 13243096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12288 Data size: 1884148 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:csmallint:smallint, 1:cint:int, 2:cbigint:bigint, 3:cfloat:float, 4:cdouble:double, 5:cstring1:string, 6:cchar1:char(255), 7:cvchar1:varchar(255), 8:cboolean1:boolean, 9:cboolean2:boolean, 10:ctinyint:tinyint, 11:ROW__ID:struct] @@ -1655,7 +1655,7 @@ STAGE PLANS: outputColumnNames: _col1, _col6, _col7, _col10 input vertices: 1 Map 2 - Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col10 (type: tinyint), _col1 (type: int), _col6 (type: char(255)), _col7 (type: varchar(255)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -1663,13 +1663,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [10, 1, 6, 7] - Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/llap_smb.q.out ql/src/test/results/clientpositive/llap/llap_smb.q.out index c3047da75b..44d78f5e59 100644 --- ql/src/test/results/clientpositive/llap/llap_smb.q.out +++ ql/src/test/results/clientpositive/llap/llap_smb.q.out @@ -215,18 +215,18 @@ STAGE PLANS: TableScan alias: b filterExpr: id is not null (type: boolean) - Statistics: Num rows: 200 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: id is not null (type: boolean) - Statistics: Num rows: 190 Data size: 1451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a filterExpr: id is not null (type: boolean) - Statistics: Num rows: 5000 Data size: 158008 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: id is not null (type: boolean) - Statistics: Num rows: 4750 Data size: 74104 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -234,18 +234,18 @@ STAGE PLANS: 0 id (type: bigint) 1 id (type: bigint) outputColumnNames: _col2, _col3 - Statistics: Num rows: 5225 Data size: 81514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 988 Data size: 7904 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col2 (type: int), _col3 (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5225 Data size: 81514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: smallint) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: smallint) - Statistics: Num rows: 5225 Data size: 81514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap Reducer 2 @@ -256,10 +256,10 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2612 Data size: 40749 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2612 Data size: 40749 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/llap_stats.q.out ql/src/test/results/clientpositive/llap/llap_stats.q.out index 299d9b4dbb..e1d302a4cb 100644 --- ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -107,11 +107,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: llap_stats - Statistics: Num rows: 10 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint - Statistics: Num rows: 10 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(ctinyint, 'hll'), compute_stats(csmallint, 'hll') keys: cint (type: int) diff --git ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out index 2b89d24027..2be75e0f2c 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out @@ -63,6 +63,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -84,6 +88,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE @@ -99,8 +119,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -121,6 +197,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 Stage: Stage-1 Move Operator @@ -138,6 +218,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out index 52b73536dd..e8f1aaf629 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out @@ -45,6 +45,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -78,6 +79,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 912000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -98,6 +135,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out index 5230b597c0..c9823ce853 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out @@ -49,6 +49,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -67,8 +70,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -89,6 +128,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out index 8b98b8ed9c..7223808a14 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -54,8 +57,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +114,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part5 PREHOOK: query: insert overwrite table nzhang_part5 partition (value) select key, value from src PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/mapjoin3.q.out ql/src/test/results/clientpositive/llap/mapjoin3.q.out index ccd03889a9..b8e9ec1c06 100644 --- ql/src/test/results/clientpositive/llap/mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/mapjoin3.q.out @@ -105,11 +105,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -119,14 +119,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col2 (type: varchar(100)), _col3 (type: varchar(100)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,16 +137,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint), age (type: varchar(100)), age (type: varchar(100)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: varchar(100)), _col2 (type: varchar(100)) Execution mode: llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/mapjoin46.q.out ql/src/test/results/clientpositive/llap/mapjoin46.q.out index c4ce8548fb..654b041480 100644 --- ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -62,11 +62,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -76,10 +76,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,16 +90,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -162,11 +162,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -179,10 +179,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -193,19 +193,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -269,11 +269,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -286,10 +286,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -300,17 +300,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -370,16 +370,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -387,11 +387,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -401,10 +401,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,11 +465,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -482,10 +482,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -496,14 +496,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -572,11 +572,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -587,10 +587,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -601,14 +601,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -680,11 +680,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -695,10 +695,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -709,14 +709,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -786,11 +786,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -801,10 +801,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,14 +815,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -888,11 +888,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -903,10 +903,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -917,16 +917,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -993,14 +993,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1008,11 +1008,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1023,10 +1023,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1101,14 +1101,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1116,11 +1116,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1131,10 +1131,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1204,14 +1204,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1219,11 +1219,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1234,10 +1234,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1308,16 +1308,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1325,11 +1325,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1340,10 +1340,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1411,14 +1411,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1426,14 +1426,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1448,10 +1448,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1524,14 +1524,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1539,14 +1539,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1561,10 +1561,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1635,14 +1635,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1650,14 +1650,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1672,10 +1672,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1744,16 +1744,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1761,16 +1761,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1785,10 +1785,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1886,21 +1886,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -1911,10 +1911,10 @@ STAGE PLANS: input vertices: 1 Map 4 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1922,11 +1922,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1937,10 +1937,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1948,16 +1948,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1972,10 +1972,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} - Statistics: Num rows: 36 Data size: 15240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 15240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out index 30239fa2ec..0ffe74e203 100644 --- ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out +++ ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out @@ -95,14 +95,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: dec is not null (type: boolean) - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -112,11 +112,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(4,2)) sort order: + - Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(4,0)) Execution mode: llap LLAP IO: all inputs @@ -124,19 +124,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: dec is not null (type: boolean) - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(6,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(6,2)) - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -145,10 +145,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out index 5468448dd8..0cda68d897 100644 --- ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out +++ ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out @@ -62,11 +62,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -79,10 +79,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -93,16 +93,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -161,11 +161,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -178,10 +178,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -192,14 +192,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/mapreduce1.q.out ql/src/test/results/clientpositive/llap/mapreduce1.q.out index 7908e586e9..f777993bae 100644 --- ql/src/test/results/clientpositive/llap/mapreduce1.q.out +++ ql/src/test/results/clientpositive/llap/mapreduce1.q.out @@ -34,6 +34,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +74,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -90,6 +119,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/llap/mapreduce2.q.out ql/src/test/results/clientpositive/llap/mapreduce2.q.out index d468e1eb36..a3334685c7 100644 --- ql/src/test/results/clientpositive/llap/mapreduce2.q.out +++ ql/src/test/results/clientpositive/llap/mapreduce2.q.out @@ -32,6 +32,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -70,6 +71,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -87,6 +116,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/llap/merge1.q.out ql/src/test/results/clientpositive/llap/merge1.q.out index e77a5dfe21..5499af7eb8 100644 --- ql/src/test/results/clientpositive/llap/merge1.q.out +++ ql/src/test/results/clientpositive/llap/merge1.q.out @@ -26,6 +26,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +73,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -89,6 +118,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 PREHOOK: query: insert overwrite table dest1 select key, count(1) from src group by key @@ -479,26 +512,57 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -516,6 +580,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 PREHOOK: query: insert overwrite table dest1 select key from test_src PREHOOK: type: QUERY @@ -546,26 +614,57 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -583,6 +682,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 PREHOOK: query: insert overwrite table dest1 select key from test_src PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/merge2.q.out ql/src/test/results/clientpositive/llap/merge2.q.out index 25b194697a..b1e6d7bc40 100644 --- ql/src/test/results/clientpositive/llap/merge2.q.out +++ ql/src/test/results/clientpositive/llap/merge2.q.out @@ -26,6 +26,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +73,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -89,6 +118,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 PREHOOK: query: insert overwrite table test1 select key, count(1) from src group by key @@ -479,26 +512,57 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -516,6 +580,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 PREHOOK: query: insert overwrite table test1 select key from test_src PREHOOK: type: QUERY @@ -546,26 +614,57 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -583,6 +682,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 PREHOOK: query: insert overwrite table test1 select key from test_src PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/mergejoin.q.out ql/src/test/results/clientpositive/llap/mergejoin.q.out index cec7c7e5ac..ff0b1f1b4b 100644 --- ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -410,7 +410,7 @@ STAGE PLANS: TableScan alias: a filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -420,7 +420,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 22498 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -428,7 +428,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 230 Data size: 22498 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -439,7 +439,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 230 Data size: 22498 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -463,7 +463,7 @@ STAGE PLANS: TableScan alias: b filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -473,7 +473,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 46458 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -481,7 +481,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 475 Data size: 46458 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -492,7 +492,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 475 Data size: 46458 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -520,15 +520,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 51103 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -557,13 +557,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1592,7 +1592,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -1603,7 +1603,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1614,7 +1614,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1637,7 +1637,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -1648,7 +1648,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1659,7 +1659,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1687,15 +1687,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 53794 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1724,13 +1724,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1788,7 +1788,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -1799,7 +1799,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1810,7 +1810,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1833,7 +1833,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -1844,7 +1844,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1855,7 +1855,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1883,15 +1883,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 53794 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1920,13 +1920,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1984,7 +1984,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -1995,7 +1995,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2006,7 +2006,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 242 Data size: 23672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2029,7 +2029,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -2040,7 +2040,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2051,7 +2051,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 500 Data size: 48904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2079,15 +2079,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 53794 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 5936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -2116,13 +2116,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2170,11 +2170,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2182,7 +2184,7 @@ STAGE PLANS: TableScan alias: a filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -2192,7 +2194,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string), FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 1:string, left NULL, right NULL), VectorInBloomFilterColDynamicValue)) predicate: ((value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter))) and key is not null and value is not null) (type: boolean) - Statistics: Num rows: 218 Data size: 39121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -2200,7 +2202,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 218 Data size: 39121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + @@ -2211,7 +2213,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 218 Data size: 39121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2231,7 +2233,7 @@ STAGE PLANS: partitionColumnCount: 1 partitionColumns: ds:string scratchColumnTypeNames: [] - Map 5 + Map 6 Map Operator Tree: TableScan alias: c @@ -2314,12 +2316,12 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 7 + Map 8 Map Operator Tree: TableScan alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -2327,9 +2329,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 0:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) + predicate: ((key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter))) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -2337,7 +2339,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2348,7 +2350,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2377,12 +2379,25 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 239 Data size: 43033 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 39 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 239 Data size: 43033 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 39 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=25) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2392,15 +2407,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -2429,18 +2444,56 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: VALUE._col0:int, VALUE._col1:int, VALUE._col2:binary + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=25) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFBloomFilterMerge(col 2:binary) -> binary + className: VectorGroupByOperator + groupByMode: FINAL + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0, 1, 2] + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reducer 7 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2530,7 +2583,7 @@ STAGE PLANS: TableScan alias: a filterExpr: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 65252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -2540,7 +2593,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) - Statistics: Num rows: 230 Data size: 62016 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 @@ -2548,7 +2601,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 230 Data size: 62016 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + @@ -2559,7 +2612,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 230 Data size: 62016 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2583,7 +2636,7 @@ STAGE PLANS: TableScan alias: b filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 134584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -2593,7 +2646,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) - Statistics: Num rows: 475 Data size: 127854 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 @@ -2601,7 +2654,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 475 Data size: 127854 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + @@ -2612,7 +2665,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 475 Data size: 127854 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2640,15 +2693,15 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col1 (type: string) - Statistics: Num rows: 522 Data size: 140639 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -2677,13 +2730,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2746,7 +2799,7 @@ STAGE PLANS: TableScan alias: s1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -2756,7 +2809,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -2764,7 +2817,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2775,7 +2828,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2799,7 +2852,7 @@ STAGE PLANS: TableScan alias: s3 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -2809,7 +2862,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -2817,7 +2870,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2828,7 +2881,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2852,7 +2905,7 @@ STAGE PLANS: TableScan alias: s2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -2862,7 +2915,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -2870,7 +2923,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2881,7 +2934,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 483 Data size: 1843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2905,7 +2958,7 @@ STAGE PLANS: TableScan alias: b filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -2915,7 +2968,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -2923,7 +2976,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2934,7 +2987,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2963,12 +3016,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 253 Data size: 965 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 382 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 483 Data size: 1843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2978,15 +3031,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 531 Data size: 2027 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1009 Data size: 8072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -3015,13 +3068,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3063,7 +3116,7 @@ STAGE PLANS: TableScan alias: a filterExpr: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 65252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -3073,7 +3126,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) - Statistics: Num rows: 230 Data size: 62016 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 @@ -3081,7 +3134,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 230 Data size: 62016 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + @@ -3092,7 +3145,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 230 Data size: 62016 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3116,7 +3169,7 @@ STAGE PLANS: TableScan alias: b filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 134584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -3126,7 +3179,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) - Statistics: Num rows: 475 Data size: 127854 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 @@ -3134,7 +3187,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 475 Data size: 127854 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + @@ -3145,7 +3198,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 475 Data size: 127854 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3173,15 +3226,15 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col1 (type: string) - Statistics: Num rows: 522 Data size: 140639 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -3210,13 +3263,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3262,11 +3315,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3274,7 +3329,7 @@ STAGE PLANS: TableScan alias: a filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -3284,7 +3339,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string), FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 1:string, left NULL, right NULL), VectorInBloomFilterColDynamicValue)) predicate: ((value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter))) and key is not null and value is not null) (type: boolean) - Statistics: Num rows: 218 Data size: 39121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3292,7 +3347,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 218 Data size: 39121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + @@ -3303,7 +3358,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 218 Data size: 39121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3323,7 +3378,7 @@ STAGE PLANS: partitionColumnCount: 1 partitionColumns: ds:string scratchColumnTypeNames: [] - Map 5 + Map 6 Map Operator Tree: TableScan alias: c @@ -3406,12 +3461,12 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 7 + Map 8 Map Operator Tree: TableScan alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -3419,9 +3474,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 0:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) + predicate: ((key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter))) and key is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -3429,7 +3484,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3440,7 +3495,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3469,12 +3524,25 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 239 Data size: 43033 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 39 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 239 Data size: 43033 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 39 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=25) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3484,15 +3552,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -3521,18 +3589,56 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: VALUE._col0:int, VALUE._col1:int, VALUE._col2:binary + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=25) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0:int) -> int, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFBloomFilterMerge(col 2:binary) -> binary + className: VectorGroupByOperator + groupByMode: FINAL + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0, 1, 2] + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reducer 7 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -3630,7 +3736,7 @@ STAGE PLANS: TableScan alias: s1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -3640,7 +3746,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -3648,7 +3754,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3659,7 +3765,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3683,7 +3789,7 @@ STAGE PLANS: TableScan alias: s3 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -3693,7 +3799,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -3701,7 +3807,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3712,7 +3818,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3736,7 +3842,7 @@ STAGE PLANS: TableScan alias: s2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -3746,7 +3852,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -3754,7 +3860,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3765,7 +3871,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 483 Data size: 1843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3789,7 +3895,7 @@ STAGE PLANS: TableScan alias: b filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -3799,7 +3905,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -3807,7 +3913,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3818,7 +3924,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3847,12 +3953,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 253 Data size: 965 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 382 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 483 Data size: 1843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -3862,15 +3968,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 531 Data size: 2027 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1009 Data size: 8072 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -3899,13 +4005,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3961,7 +4067,7 @@ STAGE PLANS: TableScan alias: t1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 66176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -3971,7 +4077,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 62894 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3979,7 +4085,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 230 Data size: 62894 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -3989,7 +4095,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 230 Data size: 62894 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4013,7 +4119,7 @@ STAGE PLANS: TableScan alias: t2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 136488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] @@ -4023,7 +4129,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 129663 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -4031,7 +4137,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 475 Data size: 129663 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -4041,7 +4147,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 475 Data size: 129663 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4083,7 +4189,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 230 Data size: 62894 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -4094,7 +4200,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 230 Data size: 62894 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -4104,15 +4210,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 142629 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 391 Data size: 3128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -4141,13 +4247,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4175,7 +4281,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 475 Data size: 129663 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -4186,7 +4292,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 475 Data size: 129663 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/mm_all.q.out ql/src/test/results/clientpositive/llap/mm_all.q.out index cfbe659634..5cd0061bcc 100644 --- ql/src/test/results/clientpositive/llap/mm_all.q.out +++ ql/src/test/results/clientpositive/llap/mm_all.q.out @@ -63,27 +63,66 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: intermediate - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.part_mm Write Type: INSERT + Select Operator + expressions: _col0 (type: int), UDFToInteger('455') (type: int) + outputColumnNames: key, key_mm + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: key_mm (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -105,6 +144,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.part_mm PREHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate PREHOOK: type: QUERY @@ -1779,7 +1822,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 3 numRows 6 rawDataSize 13 @@ -1829,7 +1872,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 6 numRows 12 rawDataSize 26 diff --git ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out index 88951c3d3a..05ec048848 100644 --- ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out +++ ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out @@ -1683,14 +1683,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 125 Data size: 23341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 119 Data size: 22220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 119 Data size: 22220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1699,15 +1699,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 130 Data size: 24442 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1737,10 +1737,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out index 658c1bcc87..39feaec783 100644 --- ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out +++ ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out @@ -40,21 +40,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee - Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: department_id (type: int), gender (type: varchar(10)), education_level (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 3672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: int) sort order: ++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 36 Data size: 3672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -64,19 +64,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: varchar(10)), KEY._col2 (type: int), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 18 Data size: 1836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN (((_col3 = 3) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 5) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 6) and _col2 is not null)) THEN (1) ELSE (null) END (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 18 Data size: 1836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), count(_col1), count(_col2) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -85,10 +85,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -181,21 +181,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee - Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: gender (type: varchar(10)), department_id (type: int), education_level (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ Map-reduce partition columns: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 60 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -205,19 +205,19 @@ STAGE PLANS: keys: KEY._col0 (type: varchar(10)), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN (((_col3 = 3) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 5) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 6) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN ((_col3 = 4)) THEN (1) ELSE (null) END (type: int), CASE WHEN ((_col3 = 0)) THEN (1) ELSE (null) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 30 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), count(_col1), count(_col2), count(_col3), count(_col4) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -226,14 +226,14 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: bigint), _col0 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out index df92c0004f..a00a7bce7b 100644 --- ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out +++ ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out @@ -56,6 +56,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -83,6 +87,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -105,6 +122,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Lateral View Forward Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -126,6 +156,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 @@ -148,8 +191,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -167,6 +253,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -181,6 +271,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C @@ -280,7 +374,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -397,10 +493,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -418,6 +542,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1684 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -435,6 +587,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -449,6 +605,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key @@ -532,7 +692,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -615,9 +778,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -641,6 +832,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE @@ -662,6 +866,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -679,6 +926,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -693,6 +944,10 @@ STAGE PLANS: Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 Stage: Stage-2 Move Operator @@ -707,6 +962,10 @@ STAGE PLANS: Stage: Stage-7 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key @@ -804,8 +1063,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -933,10 +1195,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -954,7 +1244,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -975,6 +1293,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -992,6 +1338,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -1006,6 +1356,10 @@ STAGE PLANS: Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 Stage: Stage-2 Move Operator @@ -1020,6 +1374,10 @@ STAGE PLANS: Stage: Stage-7 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 PREHOOK: query: from src_10 insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C @@ -1159,8 +1517,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1281,10 +1643,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -1302,7 +1692,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Forward @@ -1328,6 +1746,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE @@ -1349,6 +1780,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -1366,6 +1840,10 @@ STAGE PLANS: Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -1380,6 +1858,10 @@ STAGE PLANS: Stage: Stage-7 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 Stage: Stage-2 Move Operator @@ -1394,6 +1876,10 @@ STAGE PLANS: Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 Stage: Stage-3 Move Operator @@ -1408,6 +1894,10 @@ STAGE PLANS: Stage: Stage-9 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv4 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key diff --git ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out index 5dbdeb4e73..01a3c7e93c 100644 --- ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out +++ ql/src/test/results/clientpositive/llap/optimize_join_ptp.q.out @@ -52,19 +52,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (k < 15) (type: boolean) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v (type: string), k (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col1) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col1) (type: double) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -72,24 +72,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((v = 'people') and k is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: k (type: double) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 3 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -102,10 +102,10 @@ STAGE PLANS: 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 620 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/orc_analyze.q.out ql/src/test/results/clientpositive/llap/orc_analyze.q.out index 4e4e3e3f77..fd33bfd408 100644 --- ql/src/test/results/clientpositive/llap/orc_analyze.q.out +++ ql/src/test/results/clientpositive/llap/orc_analyze.q.out @@ -230,7 +230,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 52600 @@ -569,7 +569,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 1 numRows 50 rawDataSize 21950 @@ -610,7 +610,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 1 numRows 50 rawDataSize 22050 @@ -961,7 +961,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 4 numRows 50 rawDataSize 21975 @@ -1002,7 +1002,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 4 numRows 50 rawDataSize 22043 diff --git ql/src/test/results/clientpositive/llap/orc_merge1.q.out ql/src/test/results/clientpositive/llap/orc_merge1.q.out index 5669a5baf4..64b846043d 100644 --- ql/src/test/results/clientpositive/llap/orc_merge1.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge1.q.out @@ -60,6 +60,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +81,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +139,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -145,6 +188,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -163,8 +209,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -194,6 +276,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-4 Tez @@ -275,6 +361,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -293,8 +382,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -324,6 +449,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-4 Tez diff --git ql/src/test/results/clientpositive/llap/orc_merge10.q.out ql/src/test/results/clientpositive/llap/orc_merge10.q.out index 4671e477e0..77b4325a0a 100644 --- ql/src/test/results/clientpositive/llap/orc_merge10.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge10.q.out @@ -60,6 +60,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +81,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +139,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -145,6 +188,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -163,8 +209,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -194,6 +276,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-4 Tez @@ -275,6 +361,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -293,8 +382,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -324,6 +449,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-4 Tez diff --git ql/src/test/results/clientpositive/llap/orc_merge2.q.out ql/src/test/results/clientpositive/llap/orc_merge2.q.out index 53a64248a3..658c78901c 100644 --- ql/src/test/results/clientpositive/llap/orc_merge2.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge2.q.out @@ -34,6 +34,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -52,8 +55,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +114,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, diff --git ql/src/test/results/clientpositive/llap/orc_merge3.q.out ql/src/test/results/clientpositive/llap/orc_merge3.q.out index dca37ecd2b..122c6edb2d 100644 --- ql/src/test/results/clientpositive/llap/orc_merge3.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge3.q.out @@ -64,26 +64,57 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: orcfile_merge3a - Statistics: Num rows: 1000 Data size: 189412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 189412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 189412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -101,6 +132,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge3b SELECT key, value FROM orcfile_merge3a diff --git ql/src/test/results/clientpositive/llap/orc_merge4.q.out ql/src/test/results/clientpositive/llap/orc_merge4.q.out index a2aa416681..bf3e3a9249 100644 --- ql/src/test/results/clientpositive/llap/orc_merge4.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge4.q.out @@ -82,26 +82,57 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: orcfile_merge3a - Statistics: Num rows: 1000 Data size: 272788 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 272788 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 272788 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -119,6 +150,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge3b SELECT key, value FROM orcfile_merge3a diff --git ql/src/test/results/clientpositive/llap/orc_merge5.q.out ql/src/test/results/clientpositive/llap/orc_merge5.q.out index fafba53f84..6eba365ff2 100644 --- ql/src/test/results/clientpositive/llap/orc_merge5.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge5.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -58,8 +61,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -77,6 +108,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -131,6 +166,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -153,8 +191,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -181,6 +247,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b Stage: Stage-4 Tez diff --git ql/src/test/results/clientpositive/llap/orc_merge6.q.out ql/src/test/results/clientpositive/llap/orc_merge6.q.out index e5672fd40a..1583007d7b 100644 --- ql/src/test/results/clientpositive/llap/orc_merge6.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge6.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -58,8 +61,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -80,6 +119,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -176,6 +219,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -198,8 +244,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -229,6 +311,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a Stage: Stage-4 Tez diff --git ql/src/test/results/clientpositive/llap/orc_merge7.q.out ql/src/test/results/clientpositive/llap/orc_merge7.q.out index f224822ec9..914672d89a 100644 --- ql/src/test/results/clientpositive/llap/orc_merge7.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge7.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -54,8 +57,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +114,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 PREHOOK: type: QUERY @@ -210,6 +253,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -228,8 +274,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -258,6 +340,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a Stage: Stage-4 Tez diff --git ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out index 5669a5baf4..64b846043d 100644 --- ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out @@ -60,6 +60,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +81,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +139,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -145,6 +188,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -163,8 +209,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -194,6 +276,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-4 Tez @@ -275,6 +361,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -293,8 +382,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -324,6 +449,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-4 Tez diff --git ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out index ae7250a6c0..0e9b34f8ec 100644 --- ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -57,8 +60,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -76,6 +107,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out index c1822a17fc..7be4ffab09 100644 --- ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -54,8 +57,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +114,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index d93f604d7a..9f20f77cb0 100644 --- ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -133,19 +133,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -156,10 +156,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -191,19 +191,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -214,10 +214,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -321,22 +321,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToInteger(t) > -2) and (t < 0)) (type: boolean) - Statistics: Num rows: 116 Data size: 441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -347,10 +347,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -389,22 +389,22 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToInteger(t) > -2) and (t < 0)) (type: boolean) - Statistics: Num rows: 116 Data size: 441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -415,10 +415,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -589,18 +589,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 187624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((not (t) IN (-1, -2, -3)) and (s like 'bob%') and s is not null and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -609,10 +609,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -655,18 +655,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 1049 Data size: 187624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((not (t) IN (-1, -2, -3)) and (s like 'bob%') and s is not null and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -675,10 +675,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -786,18 +786,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 199600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0) and (d >= 10.0) and (not (s like '%car%')) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -808,13 +808,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -865,18 +865,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1049 Data size: 199600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0) and (d >= 10.0) and (not (s like '%car%')) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -887,13 +887,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1010,18 +1010,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 199600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101) and (t > 0) and (t > 10) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1032,14 +1032,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1048,13 +1048,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1110,18 +1110,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1049 Data size: 199600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101) and (t > 0) and (t > 10) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1132,14 +1132,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1148,13 +1148,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/parallel.q.out ql/src/test/results/clientpositive/llap/parallel.q.out index 3beb3400d7..4ac3fbb2f5 100644 --- ql/src/test/results/clientpositive/llap/parallel.q.out +++ ql/src/test/results/clientpositive/llap/parallel.q.out @@ -39,6 +39,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -93,6 +95,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -106,6 +121,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -123,6 +181,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a Stage: Stage-1 Move Operator @@ -137,6 +199,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b PREHOOK: query: from (select key, value from src group by key, value) s insert overwrite table src_a select s.key, s.value group by s.key, s.value diff --git ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index 9ceb438a7f..6885346539 100644 --- ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -125,19 +125,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs (cache only) @@ -148,10 +148,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -183,19 +183,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs (cache only) @@ -206,10 +206,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -527,18 +527,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 187624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((not (t) IN (-1, -2, -3)) and (s like 'bob%') and s is not null and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs (cache only) Reducer 2 @@ -547,10 +547,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -593,18 +593,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 1049 Data size: 187624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((not (t) IN (-1, -2, -3)) and (s like 'bob%') and s is not null and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs (cache only) Reducer 2 @@ -613,10 +613,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 54 Data size: 9658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -724,18 +724,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 199600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0) and (d >= 10.0) and (not (s like '%car%')) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -746,13 +746,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -803,18 +803,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1049 Data size: 199600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0) and (d >= 10.0) and (not (s like '%car%')) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -825,13 +825,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -993,18 +993,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 199600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101) and (t > 0) and (t > 10) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1015,14 +1015,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1031,13 +1031,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1093,18 +1093,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1049 Data size: 199600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101) and (t > 0) and (t > 10) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1115,14 +1115,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1131,13 +1131,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1205,18 +1205,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: ((f < 123.2) and (f > 1.92) and (f >= 9.99) and f BETWEEN 1.92 AND 123.2 and (i < 67627) and (i > 60627) and (i >= 60626) and i BETWEEN 60626 AND 67627 and (b < 4294967861) and (b > 4294967261) and (b >= 4294967260) and b BETWEEN 4294967261 AND 4294967861) (type: boolean) - Statistics: Num rows: 1049 Data size: 15968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 16784 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((b < 4294967861) and (b > 4294967261) and (b >= 4294967260) and (f < 123.2) and (f > 1.92) and (f >= 9.99) and (i < 67627) and (i > 60627) and (i >= 60626) and b BETWEEN 4294967261 AND 4294967861 and f BETWEEN 1.92 AND 123.2 and i BETWEEN 60626 AND 67627) (type: boolean) - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: f (type: float), i (type: int), b (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: - - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: bigint) Execution mode: llap @@ -1227,14 +1227,14 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: - - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: bigint) Reducer 3 @@ -1243,13 +1243,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/partition_pruning.q.out ql/src/test/results/clientpositive/llap/partition_pruning.q.out index c525ee7bd6..6060bf7fae 100644 --- ql/src/test/results/clientpositive/llap/partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/partition_pruning.q.out @@ -106,7 +106,7 @@ STAGE PLANS: partition values: dt 2001-01-01 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer @@ -175,7 +175,7 @@ STAGE PLANS: partition values: dt 2001-01-01 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer @@ -220,7 +220,7 @@ STAGE PLANS: partition values: dt 2001-01-03 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer @@ -289,7 +289,7 @@ STAGE PLANS: partition values: dt 2001-01-01 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer @@ -334,7 +334,7 @@ STAGE PLANS: partition values: dt 2001-01-03 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer diff --git ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out index b8cf9cb881..9c88e3c4c7 100644 --- ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out +++ ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out @@ -79,38 +79,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: f1 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: i is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: f2 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: i is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -123,12 +123,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 6230 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col9 (type: int) sort order: + Map-reduce partition columns: _col9 (type: int) - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 6230 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -140,14 +140,14 @@ STAGE PLANS: 0 _col9 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11 - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 62700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col9 (type: int), 'foo' (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: int), 'bar' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 80100 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 80100 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -210,24 +210,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: f1 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: i is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -240,12 +240,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 6230 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col9 (type: int) sort order: + Map-reduce partition columns: _col9 (type: int) - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 6230 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -257,14 +257,14 @@ STAGE PLANS: 0 _col9 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11 - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 62700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col9 (type: int), 'foo' (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: int), 'foo' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 80100 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 80100 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/ppd_union_view.q.out ql/src/test/results/clientpositive/llap/ppd_union_view.q.out index 52f2ce556b..b80286b1f0 100644 --- ql/src/test/results/clientpositive/llap/ppd_union_view.q.out +++ ql/src/test/results/clientpositive/llap/ppd_union_view.q.out @@ -197,22 +197,22 @@ STAGE PLANS: alias: t1_old properties: insideView TRUE - Statistics: Num rows: 1 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 181 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: keymap is not null (type: boolean) - Statistics: Num rows: 1 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 181 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: keymap (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string) auto parallelism: true @@ -229,7 +229,7 @@ STAGE PLANS: partition values: ds 2011-10-13 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"keymap":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns keymap,value @@ -278,22 +278,22 @@ STAGE PLANS: alias: t1_mapping properties: insideView TRUE - Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: keymap is not null (type: boolean) - Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), keymap (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col0 (type: string) auto parallelism: true @@ -310,7 +310,7 @@ STAGE PLANS: partition values: ds 2011-10-13 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","keymap":"true"}} bucket_count -1 column.name.delimiter , columns key,keymap @@ -365,11 +365,11 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col1, _col3 Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-13' (type: string) outputColumnNames: _col0, _col1, _col2 @@ -466,12 +466,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1_new - Statistics: Num rows: 1 Data size: 379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-15' (type: string) outputColumnNames: _col0, _col1, _col2 @@ -511,7 +511,7 @@ STAGE PLANS: partition values: ds 2011-10-15 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/llap/ptf.q.out ql/src/test/results/clientpositive/llap/ptf.q.out index 4f79e260ae..8836a424ff 100644 --- ql/src/test/results/clientpositive/llap/ptf.q.out +++ ql/src/test/results/clientpositive/llap/ptf.q.out @@ -2966,8 +2966,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3069,9 +3071,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 @@ -3106,7 +3136,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_0 (type: bigint), _col5 (type: int) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3166,6 +3196,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(s2, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(fv1, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -3183,6 +3241,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 Stage: Stage-1 Move Operator @@ -3197,6 +3259,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 PREHOOK: query: from noop(on part partition by p_mfgr diff --git ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out index 066295f090..36b8c474d7 100644 --- ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out +++ ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out @@ -68,14 +68,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: rcfile_createas1a - Statistics: Num rows: 1000 Data size: 189412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), (hash(key) pmod 50) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 189412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 189412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out index e2ecc76948..4ff5569a7c 100644 --- ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out +++ ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out @@ -34,6 +34,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -52,8 +55,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.rcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +114,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge2a PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, diff --git ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out index 02ee0e0181..575be85eed 100644 --- ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out +++ ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out @@ -64,26 +64,57 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: rcfile_merge3a - Statistics: Num rows: 1000 Data size: 189412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 189412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 189412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.rcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -101,6 +132,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge3b PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a diff --git ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out index 52c97d11d5..f22e01a536 100644 --- ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out +++ ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out @@ -64,26 +64,57 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: rcfile_merge3a - Statistics: Num rows: 1000 Data size: 188412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 188412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 188412 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.rcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -101,6 +132,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge3b PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a diff --git ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out index 7acd942d58..fd8cc2026f 100644 --- ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out +++ ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out @@ -141,6 +141,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -178,6 +213,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.bucket5_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket5_1 select * from src cluster by key @@ -269,6 +309,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -357,6 +398,61 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string), '2010-03-29' (type: string) + outputColumnNames: aid, bid, t, ctime, etime, l, et, ds + Statistics: Num rows: 1 Data size: 462 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(aid, 'hll'), compute_stats(bid, 'hll'), compute_stats(t, 'hll'), compute_stats(ctime, 'hll'), compute_stats(etime, 'hll'), compute_stats(l, 'hll'), compute_stats(et, 'hll') + keys: '2010-03-29' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3142 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: '2010-03-29' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2010-03-29' (type: string) + Statistics: Num rows: 1 Data size: 3142 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + keys: '2010-03-29' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), '2010-03-29' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types struct:struct:struct:struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -392,4 +488,9 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: aid, bid, t, ctime, etime, l, et + Column Types: string, string, int, string, bigint, string, string + Table: default.complex_tbl_1 + Is Table Level Stats: false diff --git ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out index 6864da48cd..2955ee67b2 100644 --- ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out +++ ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out @@ -39,22 +39,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT key), count(DISTINCT name) keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -65,10 +65,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -118,21 +118,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -142,18 +142,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col1), count(DISTINCT _col2) keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -162,10 +162,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -215,21 +215,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -239,18 +239,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col2), count(DISTINCT _col1) keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -259,10 +259,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -312,21 +312,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -336,18 +336,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col1), count(DISTINCT _col2) keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -356,10 +356,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -409,21 +409,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -433,18 +433,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col2), count(DISTINCT _col1) keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -453,10 +453,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/sample1.q.out ql/src/test/results/clientpositive/llap/sample1.q.out index 5b69bb5832..3458ee28d0 100644 --- ql/src/test/results/clientpositive/llap/sample1.q.out +++ ql/src/test/results/clientpositive/llap/sample1.q.out @@ -26,6 +26,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -73,6 +76,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(dt, 'hll'), compute_stats(hr, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -128,6 +147,37 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -164,6 +214,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s diff --git ql/src/test/results/clientpositive/llap/sample10.q.out ql/src/test/results/clientpositive/llap/sample10.q.out index f69dc3d031..8552a3a982 100644 --- ql/src/test/results/clientpositive/llap/sample10.q.out +++ ql/src/test/results/clientpositive/llap/sample10.q.out @@ -56,29 +56,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpartbucket - Statistics: Num rows: 40 Data size: 14776 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 40 Data size: 10760 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: true predicate: (((hash(key) & 2147483647) % 4) = 0) (type: boolean) sampleDesc: BUCKET 1 OUT OF 4 - Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 5380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: ds - Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 5380 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: ds (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -96,7 +96,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -146,7 +146,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -196,7 +196,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -246,7 +246,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -301,12 +301,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -317,13 +317,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out index 76ea04312e..e86439b562 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out @@ -431,14 +431,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 26640 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_table.q.out index c97dd65f1a..c4382ddf90 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_table.q.out @@ -79,14 +79,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: insert_num (type: int), a (type: int), b (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -216,14 +216,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_string_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: insert_num (type: int), a (type: int), b (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -415,14 +415,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_string_group_double - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -683,14 +683,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_multi_ints_string_group - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -862,14 +862,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_floating_string_group - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out index 163ed4b744..861d7f453f 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out @@ -457,7 +457,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 26640 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:b:string, 2:s2:struct, 3:part:int, 4:ROW__ID:struct] @@ -468,13 +468,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 3, 1, 2] - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out index 7bf61b5514..da367cf3c0 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_table.q.out @@ -79,7 +79,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:a:int, 2:b:string, 3:c:int, 4:ROW__ID:struct] @@ -90,13 +90,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -241,7 +241,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_string_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:a:int, 2:b:string, 3:c:int, 4:d:string, 5:ROW__ID:struct] @@ -252,13 +252,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,7 +465,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_string_group_double - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:c1:double, 2:c2:double, 3:c3:double, 4:b:string, 5:ROW__ID:struct] @@ -476,13 +476,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4] - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -758,7 +758,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_multi_ints_string_group - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:c1:string, 2:c2:string, 3:c3:string, 4:c4:string, 5:c5:char(50), 6:c6:char(50), 7:c7:char(50), 8:c8:char(50), 9:c9:char(5), 10:c10:char(5), 11:c11:char(5), 12:c12:char(5), 13:c13:varchar(50), 14:c14:varchar(50), 15:c15:varchar(50), 16:c16:varchar(50), 17:c17:varchar(5), 18:c18:varchar(5), 19:c19:varchar(5), 20:c20:varchar(5), 21:b:string, 22:ROW__ID:struct] @@ -769,13 +769,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -962,7 +962,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_floating_string_group - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:c1:string, 2:c2:string, 3:c3:string, 4:c4:char(50), 5:c5:char(50), 6:c6:char(50), 7:c7:char(7), 8:c8:char(7), 9:c9:char(7), 10:c10:varchar(50), 11:c11:varchar(50), 12:c12:varchar(50), 13:c13:varchar(7), 14:c14:varchar(7), 15:c15:varchar(7), 16:b:string, 17:ROW__ID:struct] @@ -973,13 +973,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out index 94a6995ff3..22e413f2dc 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out @@ -431,14 +431,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 22667 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_table.q.out index 6c5fcf29a3..cf7013005d 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_table.q.out @@ -79,14 +79,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: insert_num (type: int), a (type: int), b (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -216,14 +216,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_string_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: insert_num (type: int), a (type: int), b (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -415,14 +415,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_string_group_double - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -683,14 +683,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_multi_ints_string_group - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -862,14 +862,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_floating_string_group - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out index b23add1ab4..2fa7bf6ef9 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out @@ -457,7 +457,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 22667 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:b:string, 2:s2:struct, 3:part:int, 4:ROW__ID:struct] @@ -468,13 +468,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 3, 1, 2] - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out index e523d33ec6..a472bc7b04 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out @@ -79,7 +79,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:a:int, 2:b:string, 3:c:int, 4:ROW__ID:struct] @@ -90,13 +90,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -241,7 +241,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_string_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:a:int, 2:b:string, 3:c:int, 4:d:string, 5:ROW__ID:struct] @@ -252,13 +252,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,7 +465,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_string_group_double - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:c1:double, 2:c2:double, 3:c3:double, 4:b:string, 5:ROW__ID:struct] @@ -476,13 +476,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4] - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -758,7 +758,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_multi_ints_string_group - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:c1:string, 2:c2:string, 3:c3:string, 4:c4:string, 5:c5:char(50), 6:c6:char(50), 7:c7:char(50), 8:c8:char(50), 9:c9:char(5), 10:c10:char(5), 11:c11:char(5), 12:c12:char(5), 13:c13:varchar(50), 14:c14:varchar(50), 15:c15:varchar(50), 16:c16:varchar(50), 17:c17:varchar(5), 18:c18:varchar(5), 19:c19:varchar(5), 20:c20:varchar(5), 21:b:string, 22:ROW__ID:struct] @@ -769,13 +769,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -962,7 +962,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_floating_string_group - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:c1:string, 2:c2:string, 3:c3:string, 4:c4:char(50), 5:c5:char(50), 6:c6:char(50), 7:c7:char(7), 8:c8:char(7), 9:c9:char(7), 10:c10:varchar(50), 11:c11:varchar(50), 12:c12:varchar(50), 13:c13:varchar(7), 14:c14:varchar(7), 15:c15:varchar(7), 16:b:string, 17:ROW__ID:struct] @@ -973,13 +973,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out index 4ca61f6379..c6f3bdcc4c 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out @@ -457,7 +457,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 22667 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:b:string, 2:s2:struct, 3:part:int, 4:ROW__ID:struct] @@ -468,13 +468,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 3, 1, 2] - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 8 Data size: 21760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out index 4d4304e868..a601977aca 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_table.q.out @@ -79,7 +79,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:a:int, 2:b:string, 3:c:int, 4:ROW__ID:struct] @@ -90,13 +90,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -241,7 +241,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_add_int_string_permute_select - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:a:int, 2:b:string, 3:c:int, 4:d:string, 5:ROW__ID:struct] @@ -252,13 +252,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,7 +465,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_string_group_double - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:c1:double, 2:c2:double, 3:c3:double, 4:b:string, 5:ROW__ID:struct] @@ -476,13 +476,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4] - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -758,7 +758,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_multi_ints_string_group - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:c1:string, 2:c2:string, 3:c3:string, 4:c4:string, 5:c5:char(50), 6:c6:char(50), 7:c7:char(50), 8:c8:char(50), 9:c9:char(5), 10:c10:char(5), 11:c11:char(5), 12:c12:char(5), 13:c13:varchar(50), 14:c14:varchar(50), 15:c15:varchar(50), 16:c16:varchar(50), 17:c17:varchar(5), 18:c18:varchar(5), 19:c19:varchar(5), 20:c20:varchar(5), 21:b:string, 22:ROW__ID:struct] @@ -769,13 +769,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 16248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 15696 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -962,7 +962,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: table_change_numeric_group_string_group_floating_string_group - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true vectorizationSchemaColumns: [0:insert_num:int, 1:c1:string, 2:c2:string, 3:c3:string, 4:c4:char(50), 5:c5:char(50), 6:c6:char(50), 7:c7:char(7), 8:c8:char(7), 9:c9:char(7), 10:c10:varchar(50), 11:c11:varchar(50), 12:c12:varchar(50), 13:c13:varchar(7), 14:c14:varchar(7), 15:c15:varchar(7), 16:b:string, 17:ROW__ID:struct] @@ -973,13 +973,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 12540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 10884 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/semijoin6.q.out ql/src/test/results/clientpositive/llap/semijoin6.q.out index 31d2ebbf78..523fc54450 100644 --- ql/src/test/results/clientpositive/llap/semijoin6.q.out +++ ql/src/test/results/clientpositive/llap/semijoin6.q.out @@ -52,19 +52,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -72,24 +72,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -102,10 +102,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -152,12 +152,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: b (type: int) sort order: + Map-reduce partition columns: b (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: a (type: int) Execution mode: llap LLAP IO: no inputs @@ -165,21 +165,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: b (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -193,10 +193,10 @@ STAGE PLANS: 1 _col0 (type: int) nullSafes: [true] outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -245,14 +245,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -260,19 +260,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: b (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -287,14 +287,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2 residual filter predicates: {(_col1 <> _col2)} - Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -344,19 +344,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -364,24 +364,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -396,14 +396,14 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 residual filter predicates: {(_col1 <> _col3)} - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -450,14 +450,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -465,19 +465,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -492,14 +492,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 residual filter predicates: {((_col0 = _col2) or (_col1 <> _col3))} - Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -549,19 +549,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -569,24 +569,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -599,10 +599,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -656,19 +656,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -676,24 +676,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col3 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col3 (type: int), _col3 (type: int), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -708,14 +708,14 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 residual filter predicates: {((_col0 + _col3) > 400)} {(CASE WHEN ((_col0 > 3)) THEN (true) WHEN ((_col3 > 1900)) THEN (true) ELSE (false) END or ((COALESCE(_col0) + COALESCE(_col3)) > 1900))} - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/semijoin7.q.out ql/src/test/results/clientpositive/llap/semijoin7.q.out index e15e3f5264..dedeea655d 100644 --- ql/src/test/results/clientpositive/llap/semijoin7.q.out +++ ql/src/test/results/clientpositive/llap/semijoin7.q.out @@ -52,15 +52,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: a (type: int) sort order: + Map-reduce partition columns: a (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: b (type: int) Execution mode: llap LLAP IO: no inputs @@ -68,24 +68,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -98,10 +98,10 @@ STAGE PLANS: 0 a (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -148,12 +148,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: b (type: int) sort order: + Map-reduce partition columns: b (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: a (type: int) Execution mode: llap LLAP IO: no inputs @@ -161,21 +161,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: b (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -189,10 +189,10 @@ STAGE PLANS: 1 _col0 (type: int) nullSafes: [true] outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -241,10 +241,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: a (type: int), b (type: int) Execution mode: llap LLAP IO: no inputs @@ -252,19 +252,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: b (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -279,14 +279,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col5 residual filter predicates: {(_col1 <> _col5)} - Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -336,15 +336,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: a (type: int) sort order: + Map-reduce partition columns: a (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: b (type: int) Execution mode: llap LLAP IO: no inputs @@ -352,24 +352,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -384,14 +384,14 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col6 residual filter predicates: {(_col1 <> _col6)} - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -438,10 +438,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: a (type: int), b (type: int) Execution mode: llap LLAP IO: no inputs @@ -449,19 +449,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -476,14 +476,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col5, _col6 residual filter predicates: {((_col0 = _col5) or (_col1 <> _col6))} - Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -533,15 +533,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: a (type: int) sort order: + Map-reduce partition columns: a (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: b (type: int) Execution mode: llap LLAP IO: no inputs @@ -549,24 +549,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -579,10 +579,10 @@ STAGE PLANS: 0 a (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -636,15 +636,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: a (type: int) sort order: + Map-reduce partition columns: a (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: b (type: int) Execution mode: llap LLAP IO: no inputs @@ -652,24 +652,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: b (type: int), a (type: int) outputColumnNames: _col2, _col3 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col2 (type: int), _col2 (type: int), _col2 (type: int), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -684,14 +684,14 @@ STAGE PLANS: 1 _col3 (type: int) outputColumnNames: _col0, _col1, _col5 residual filter predicates: {((_col0 + _col5) > 400)} {(CASE WHEN ((_col0 > 3)) THEN (true) WHEN ((_col5 > 1900)) THEN (true) ELSE (false) END or ((COALESCE(_col0) + COALESCE(_col5)) > 1900))} - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/skewjoin.q.out ql/src/test/results/clientpositive/llap/skewjoin.q.out index b102bca4fe..67c6e21e33 100644 --- ql/src/test/results/clientpositive/llap/skewjoin.q.out +++ ql/src/test/results/clientpositive/llap/skewjoin.q.out @@ -89,7 +89,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -111,7 +112,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -154,6 +155,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -171,6 +200,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out index 372f136f60..19c3acb6c1 100644 --- ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out +++ ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out @@ -88,19 +88,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -108,19 +108,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -134,10 +134,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 1260 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 1260 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -189,16 +189,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -206,16 +206,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -229,10 +229,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 1260 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 1260 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -287,38 +287,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -330,15 +330,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -347,10 +347,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -396,32 +396,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -433,15 +433,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -450,10 +450,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/smb_cache.q.out ql/src/test/results/clientpositive/llap/smb_cache.q.out index 60d4ff0ba0..defade5c08 100644 --- ql/src/test/results/clientpositive/llap/smb_cache.q.out +++ ql/src/test/results/clientpositive/llap/smb_cache.q.out @@ -211,26 +211,26 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=551 width=3) + Merge Join Operator [MERGEJOIN_15] (rows=130 width=8) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=124 width=3) + Select Operator [SEL_2] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_13] (rows=124 width=3) + Filter Operator [FIL_13] (rows=130 width=4) predicate:userid is not null - TableScan [TS_0] (rows=130 width=3) - default@bug_201_input_b,t1,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_0] (rows=130 width=4) + default@bug_201_input_b,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=501 width=3) + Select Operator [SEL_5] (rows=527 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=501 width=3) + Filter Operator [FIL_14] (rows=527 width=4) predicate:userid is not null - TableScan [TS_3] (rows=527 width=3) - default@bug_201_input_a,fa,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_3] (rows=527 width=4) + default@bug_201_input_a,fa,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] PREHOOK: query: select t1.userid, @@ -291,26 +291,26 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=551 width=3) + Merge Join Operator [MERGEJOIN_15] (rows=130 width=8) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=124 width=3) + Select Operator [SEL_2] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_13] (rows=124 width=3) + Filter Operator [FIL_13] (rows=130 width=4) predicate:userid is not null - TableScan [TS_0] (rows=130 width=3) - default@bug_201_input_b,t1,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_0] (rows=130 width=4) + default@bug_201_input_b,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=501 width=3) + Select Operator [SEL_5] (rows=527 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=501 width=3) + Filter Operator [FIL_14] (rows=527 width=4) predicate:userid is not null - TableScan [TS_3] (rows=527 width=3) - default@bug_201_input_a,fa,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_3] (rows=527 width=4) + default@bug_201_input_a,fa,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] PREHOOK: query: select t1.userid, diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out index b890886c8b..b74ce49a22 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out @@ -65,38 +65,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -108,15 +108,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -125,10 +125,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -189,38 +189,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -233,18 +233,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -254,11 +254,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -266,10 +266,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -345,38 +345,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -389,17 +389,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -407,17 +407,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -426,10 +426,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -497,38 +497,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -540,15 +540,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -557,10 +557,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -632,38 +632,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -675,15 +675,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -692,10 +692,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -791,38 +791,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -834,15 +834,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -851,10 +851,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -938,38 +938,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -981,15 +981,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -998,10 +998,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1063,38 +1063,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1106,15 +1106,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1123,10 +1123,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1184,38 +1184,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1227,15 +1227,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1244,10 +1244,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1301,38 +1301,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1344,15 +1344,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1361,10 +1361,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1428,57 +1428,57 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1492,15 +1492,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1509,10 +1509,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1592,38 +1592,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1635,15 +1635,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1652,10 +1652,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out index 79c896fe52..490afb69a9 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out @@ -55,22 +55,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string) auto parallelism: true @@ -85,7 +85,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -108,7 +108,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,22 +135,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -165,7 +165,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -188,7 +188,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -223,12 +223,12 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Position of Big Table: 0 - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -241,16 +241,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -369,22 +369,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 91392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col2 (type: string) auto parallelism: true @@ -399,7 +399,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -422,7 +422,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -449,22 +449,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 91392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col2 (type: string) auto parallelism: true @@ -479,7 +479,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -502,7 +502,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -537,12 +537,12 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 - Statistics: Num rows: 495 Data size: 90477 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 160182 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 495 Data size: 90477 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 160182 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -555,16 +555,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 495 Data size: 90477 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 160182 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -631,22 +631,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 91392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col2 (type: string) auto parallelism: true @@ -661,7 +661,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -684,7 +684,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -711,22 +711,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 91392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col2 (type: string) auto parallelism: true @@ -741,7 +741,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -764,7 +764,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -799,12 +799,12 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 - Statistics: Num rows: 495 Data size: 90477 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 160182 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 495 Data size: 90477 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 160182 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -817,16 +817,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 495 Data size: 90477 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 160182 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -893,22 +893,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 91392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: int) auto parallelism: true @@ -923,7 +923,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -946,7 +946,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -973,22 +973,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 91392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 450 Data size: 82252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: int) auto parallelism: true @@ -1003,7 +1003,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -1026,7 +1026,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -1061,12 +1061,12 @@ STAGE PLANS: 1 _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 - Statistics: Num rows: 495 Data size: 90477 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 160182 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 495 Data size: 90477 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 160182 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -1079,16 +1079,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 495 Data size: 90477 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 160182 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out index 077545995e..803f40487a 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out @@ -193,133 +193,133 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 9 Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -341,15 +341,15 @@ STAGE PLANS: 4 _col0 (type: int) 5 _col0 (type: int) 6 _col0 (type: int) - Statistics: Num rows: 66 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -358,10 +358,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -440,59 +440,59 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -509,15 +509,15 @@ STAGE PLANS: 4 _col0 (type: int) 5 _col0 (type: int) 6 _col0 (type: int) - Statistics: Num rows: 66 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -527,10 +527,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -611,67 +611,67 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: h - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -690,15 +690,15 @@ STAGE PLANS: 5 _col0 (type: int) 6 _col0 (type: int) 7 _col0 (type: int) - Statistics: Num rows: 77 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 357 Data size: 2856 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -708,10 +708,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -822,131 +822,131 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: h - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: i - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: j - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: k - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: l - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: m - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: n - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: o - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: p - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -982,76 +982,76 @@ STAGE PLANS: 8 _col0 (type: int) 9 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 165 Data size: 31020 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21268 Data size: 1977924 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 165 Data size: 31020 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21268 Data size: 1977924 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap Map 18 Map Operator Tree: TableScan alias: q - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 19 Map Operator Tree: TableScan alias: r - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 20 Map Operator Tree: TableScan alias: s - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 21 Map Operator Tree: TableScan alias: t - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1070,10 +1070,10 @@ STAGE PLANS: 3 _col0 (type: int) 4 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 726 Data size: 136488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 164104 Data size: 15261672 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 726 Data size: 136488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 164104 Data size: 15261672 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out index 135f83eca9..6580938944 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out @@ -53,14 +53,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,6 +87,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' @@ -228,25 +232,26 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 238) (type: boolean) - Statistics: Num rows: 5 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 238 (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -256,15 +261,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 948 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -284,6 +325,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 @@ -375,14 +420,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -409,6 +454,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out index 64763fae40..97e74517ee 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out @@ -53,14 +53,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,6 +87,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out index 07e669fff7..57c8ed0189 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out @@ -73,46 +73,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -126,15 +127,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 809 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -152,6 +181,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key @@ -1256,46 +1289,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1309,15 +1343,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 809 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1335,6 +1397,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key @@ -2455,46 +2521,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2508,15 +2575,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 195 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 195 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2534,6 +2629,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 @@ -2570,46 +2669,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2623,15 +2723,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 195 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 195 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2649,6 +2777,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 @@ -2688,19 +2820,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2708,19 +2840,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2728,19 +2860,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2756,10 +2888,10 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 391 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 391 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/sqlmerge.q.out ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 2a6a0f32de..2ff590ea0c 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -41,11 +41,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -61,7 +62,7 @@ STAGE PLANS: value expressions: ROW__ID (type: struct) Execution mode: llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: s @@ -197,6 +198,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -213,6 +234,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -260,6 +309,10 @@ STAGE PLANS: Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table Stage: Stage-1 Move Operator @@ -275,6 +328,10 @@ STAGE PLANS: Stage: Stage-9 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl PREHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) @@ -293,8 +350,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -309,7 +367,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: s @@ -361,6 +419,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -379,4 +465,8 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl diff --git ql/src/test/results/clientpositive/llap/stats11.q.out ql/src/test/results/clientpositive/llap/stats11.q.out index 66c87c3be9..456a39694b 100644 --- ql/src/test/results/clientpositive/llap/stats11.q.out +++ ql/src/test/results/clientpositive/llap/stats11.q.out @@ -306,7 +306,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -385,7 +386,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -512,6 +513,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -548,6 +596,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -673,7 +726,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -752,7 +806,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -858,7 +912,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -879,6 +933,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -892,7 +993,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -915,6 +1016,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/subquery_exists.q.out ql/src/test/results/clientpositive/llap/subquery_exists.q.out index dfe424046e..a7fbbd191d 100644 --- ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -976,14 +976,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1018,10 +1018,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1125,19 +1125,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: u - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1145,24 +1145,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (a is not null and b is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1177,17 +1177,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3 residual filter predicates: {(_col1 <> _col3)} - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1196,14 +1196,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), 3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1275,19 +1275,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: i is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1295,24 +1295,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (i is not null and j is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1327,14 +1327,14 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 residual filter predicates: {(_col1 <> _col3)} - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index 03d95004af..abeb5b41cc 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -5800,19 +5800,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: i is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToLong(_col0) (type: bigint) sort order: + Map-reduce partition columns: UDFToLong(_col0) (type: bigint) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6008,19 +6008,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (i is not null and j is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToLong(_col0) (type: bigint), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToLong(_col0) (type: bigint), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6028,21 +6028,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tt - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6056,10 +6056,10 @@ STAGE PLANS: 0 UDFToLong(_col0) (type: bigint), _col1 (type: int) 1 _col0 (type: bigint), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6072,24 +6072,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: bigint), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/subquery_multi.q.out ql/src/test/results/clientpositive/llap/subquery_multi.q.out index d0a78a2bb4..b963791df0 100644 --- ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -3862,30 +3862,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tnull - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: i - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: i (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3898,12 +3898,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 26 Data size: 16640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: int) sort order: + Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 26 Data size: 16640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16510 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: bigint) Reducer 3 Execution mode: llap @@ -3915,17 +3915,17 @@ STAGE PLANS: 0 _col5 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 28 Data size: 18304 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16518 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col0 = 3) or CASE WHEN ((_col9 = 0)) THEN (true) WHEN (_col12 is not null) THEN (false) WHEN (_col5 is null) THEN (null) WHEN ((_col10 < _col9)) THEN (null) ELSE (true) END) (type: boolean) - Statistics: Num rows: 28 Data size: 18304 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 8898 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 18304 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 18304 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3937,10 +3937,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -3949,16 +3949,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 2dd65b44be..30396b8e62 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -5603,14 +5603,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -5618,30 +5618,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int) outputColumnNames: c1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(c1) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: c1 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -5654,12 +5654,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -5671,17 +5671,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 4 Data size: 110 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean) - Statistics: Num rows: 2 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5693,10 +5693,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -5705,16 +5705,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -5761,16 +5761,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int), c2 (type: char(100)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: char(100)) sort order: + Map-reduce partition columns: _col1 (type: char(100)) - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -5778,22 +5778,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -5801,22 +5801,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: c2 (type: char(100)) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(100)) sort order: + Map-reduce partition columns: _col0 (type: char(100)) - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(100)) sort order: + Map-reduce partition columns: _col0 (type: char(100)) - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -5826,19 +5826,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: char(100)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: char(100)), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: char(100)) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 2 Execution mode: llap @@ -5850,12 +5850,12 @@ STAGE PLANS: 0 _col1 (type: char(100)) 1 _col0 (type: char(100)) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 4 Data size: 827 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: char(100)) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) - Statistics: Num rows: 4 Data size: 827 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -5867,17 +5867,17 @@ STAGE PLANS: 0 _col0 (type: int), _col1 (type: char(100)) 1 _col0 (type: int), _col1 (type: char(100)) outputColumnNames: _col0, _col3, _col4, _col7 - Statistics: Num rows: 4 Data size: 909 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 454 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 454 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 454 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5889,12 +5889,12 @@ STAGE PLANS: keys: KEY._col0 (type: char(100)) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: char(100)) Reducer 6 Execution mode: llap @@ -5906,18 +5906,18 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: char(100)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(100)) sort order: + Map-reduce partition columns: _col0 (type: char(100)) - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -5927,12 +5927,12 @@ STAGE PLANS: keys: KEY._col0 (type: char(100)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(100)) sort order: + Map-reduce partition columns: _col0 (type: char(100)) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -5941,12 +5941,12 @@ STAGE PLANS: keys: KEY._col0 (type: char(100)) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: char(100)) Reducer 9 Execution mode: llap @@ -5958,17 +5958,17 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: char(100)) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: char(100)) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6059,16 +6059,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6076,32 +6076,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: b is not null (type: boolean) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(a) keys: b (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Group By Operator keys: b (type: int), a (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6114,12 +6114,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -6131,17 +6131,17 @@ STAGE PLANS: 0 _col0 (type: int), _col1 (type: int) 1 _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6154,12 +6154,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -6168,23 +6168,23 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -6275,16 +6275,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: fixob - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6292,49 +6292,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t7 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Group By Operator keys: i (type: int), j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: fixob - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: j (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6349,17 +6349,17 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col3 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6372,12 +6372,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 5 Execution mode: llap @@ -6386,16 +6386,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 6 Execution mode: llap @@ -6407,12 +6407,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 8 Execution mode: llap @@ -6421,12 +6421,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6501,62 +6501,62 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int), j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: j (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6571,17 +6571,17 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col3 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6594,12 +6594,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -6608,16 +6608,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 5 Execution mode: llap @@ -6629,12 +6629,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 7 Execution mode: llap @@ -6643,12 +6643,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6690,45 +6690,45 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int), j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6741,12 +6741,12 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -6758,17 +6758,17 @@ STAGE PLANS: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col3, _col4, _col7 - Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6781,12 +6781,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 5 Execution mode: llap @@ -6795,19 +6795,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -6849,14 +6849,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6864,30 +6864,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: i - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: i (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6900,12 +6900,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 87 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: llap @@ -6917,17 +6917,17 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5 - Statistics: Num rows: 3 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 1 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6939,10 +6939,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -6951,16 +6951,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -7004,38 +7004,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Select Operator expressions: i (type: int) outputColumnNames: i - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: i (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -7048,12 +7048,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 75 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 75 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -7065,17 +7065,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 3 Data size: 82 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean) - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7087,10 +7087,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 5 Execution mode: llap @@ -7099,16 +7099,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index ab67a7dc59..64337ed331 100644 --- ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -491,17 +491,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tnull - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: llap @@ -531,17 +531,17 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col10 is null (type: boolean) - Statistics: Num rows: 13 Data size: 8112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 8112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 8112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -583,13 +583,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Stage: Stage-0 @@ -4687,21 +4687,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(deptno) keys: name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -4738,16 +4738,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Stage: Stage-0 @@ -4809,21 +4809,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptno is not null (type: boolean) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(name) keys: deptno (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -4858,16 +4858,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: boolean) Stage: Stage-0 @@ -4927,36 +4927,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(deptno) keys: name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Filter Operator predicate: deptno is not null (type: boolean) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(name) keys: deptno (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5017,16 +5017,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Reducer 6 Execution mode: llap @@ -5036,16 +5036,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Stage: Stage-0 @@ -5110,21 +5110,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(deptno) keys: name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5132,19 +5132,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: name (type: string) outputColumnNames: name - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(name) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5179,14 +5179,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 residual filter predicates: {(UDFToLong(_col0) > _col10)} - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5199,16 +5199,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Reducer 7 Execution mode: llap @@ -5217,10 +5217,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Stage: Stage-0 @@ -6143,16 +6143,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6160,14 +6160,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tt - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: j (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6175,17 +6175,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6198,17 +6198,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN (_col3 is null) THEN ((0 = 0)) ELSE ((_col2 = 0)) END (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6224,22 +6224,22 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 residual filter predicates: {(_col0 <> _col1)} - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 5 Execution mode: llap @@ -6249,16 +6249,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Reducer 7 Execution mode: llap @@ -6267,10 +6267,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Stage: Stage-0 @@ -6313,24 +6313,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6338,17 +6338,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6361,10 +6361,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6380,22 +6380,22 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2 residual filter predicates: {(_col1 <> _col2)} - Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(_col0) keys: _col2 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct) Reducer 4 Execution mode: llap @@ -6405,19 +6405,19 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (0.0 = _col1) (type: boolean) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -6425,10 +6425,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Stage: Stage-0 @@ -6475,16 +6475,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6594,16 +6594,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/subquery_select.q.out ql/src/test/results/clientpositive/llap/subquery_select.q.out index 8c5c5addbe..7354835124 100644 --- ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -2180,30 +2180,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tnull - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: i - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: i (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2216,12 +2216,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 650 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -2233,14 +2233,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 28 Data size: 715 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (null) ELSE (false) END (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 715 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 715 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2252,10 +2252,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Execution mode: llap @@ -2264,16 +2264,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out index c71db51f46..0f57dd5dbd 100644 --- ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out +++ ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out @@ -86,14 +86,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test - Statistics: Num rows: 1000 Data size: 231100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 950 Data size: 219545 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 950 Data size: 219545 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -103,11 +103,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1045 Data size: 241499 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1618 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1045 Data size: 241499 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1618 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: some inputs @@ -115,19 +115,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -136,10 +136,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1045 Data size: 241499 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1618 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1045 Data size: 241499 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1618 Data size: 153710 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/tez_dml.q.out ql/src/test/results/clientpositive/llap/tez_dml.q.out index adccaae2aa..89b74cf569 100644 --- ql/src/test/results/clientpositive/llap/tez_dml.q.out +++ ql/src/test/results/clientpositive/llap/tez_dml.q.out @@ -444,6 +444,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -462,8 +465,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmp_src_part + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: c, d + Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c, 'hll') + keys: d (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -483,6 +522,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c + Column Types: string + Table: default.tmp_src_part PREHOOK: query: INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src PREHOOK: type: QUERY @@ -866,6 +909,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -887,6 +934,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.even + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c, d + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c, 'hll'), compute_stats(d, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key % 2) = 1) (type: boolean) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE @@ -902,8 +962,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.odd + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c, d + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c, 'hll'), compute_stats(d, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -921,6 +1024,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c, d + Column Types: int, string + Table: default.even Stage: Stage-1 Move Operator @@ -935,6 +1042,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c, d + Column Types: int, string + Table: default.odd PREHOOK: query: FROM src INSERT INTO TABLE even SELECT key, value WHERE key % 2 = 0 diff --git ql/src/test/results/clientpositive/llap/tez_fsstat.q.out ql/src/test/results/clientpositive/llap/tez_fsstat.q.out index d6872e4a3c..30ab50ba9b 100644 --- ql/src/test/results/clientpositive/llap/tez_fsstat.q.out +++ ql/src/test/results/clientpositive/llap/tez_fsstat.q.out @@ -81,7 +81,7 @@ Database: default Table: tab_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/llap/tez_join_hash.q.out ql/src/test/results/clientpositive/llap/tez_join_hash.q.out index 32f8d67e87..cce289d509 100644 --- ql/src/test/results/clientpositive/llap/tez_join_hash.q.out +++ ql/src/test/results/clientpositive/llap/tez_join_hash.q.out @@ -58,19 +58,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_src - Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -82,15 +82,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -99,10 +99,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/tez_nway_join.q.out ql/src/test/results/clientpositive/llap/tez_nway_join.q.out index 5e977c8fb4..800f043fff 100644 --- ql/src/test/results/clientpositive/llap/tez_nway_join.q.out +++ ql/src/test/results/clientpositive/llap/tez_nway_join.q.out @@ -49,14 +49,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -68,15 +68,15 @@ STAGE PLANS: input vertices: 1 Map 3 2 Map 4 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -84,38 +84,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -125,10 +125,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -161,14 +161,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -178,7 +178,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -187,15 +187,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 3 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -203,38 +203,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -244,10 +244,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -302,48 +302,48 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -357,15 +357,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -374,10 +374,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -410,11 +410,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -424,7 +424,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -433,15 +433,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 3 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -449,32 +449,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -484,10 +484,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/tez_self_join.q.out ql/src/test/results/clientpositive/llap/tez_self_join.q.out index c5f4612b49..71c18e0b3d 100644 --- ql/src/test/results/clientpositive/llap/tez_self_join.q.out +++ ql/src/test/results/clientpositive/llap/tez_self_join.q.out @@ -38,7 +38,7 @@ POSTHOOK: query: insert into table tez_self_join2 values(1),(2),(3) POSTHOOK: type: QUERY POSTHOOK: Output: default@tez_self_join2 POSTHOOK: Lineage: tez_self_join2.id1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select s.id2, s.id3 from @@ -70,60 +70,60 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (XPROD_EDGE), Map 5 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: self1 - Statistics: Num rows: 3 Data size: 1116 Basic stats: COMPLETE Column stats: NONE + alias: tez_self_join2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((id2 = 'ab') and id1 is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 1116 Basic stats: COMPLETE Column stats: NONE + predicate: id1 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id1 (type: int), id3 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 3 Data size: 1116 Basic stats: COMPLETE Column stats: NONE + expressions: id1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 1116 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan - alias: self2 - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + alias: self1 + Statistics: Num rows: 3 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ('ab' = id3) (type: boolean) - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + predicate: ((id2 = 'ab') and id1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + expressions: id1 (type: int), id3 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan - alias: tez_self_join2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + alias: self2 + Statistics: Num rows: 3 Data size: 258 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: id1 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + predicate: ('ab' = id3) (type: boolean) + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -133,42 +133,42 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'ab' (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: 0 1 outputColumnNames: _col0, _col2 - Statistics: Num rows: 9 Data size: 5013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 5013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 5013 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 9 Data size: 5514 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 'ab' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 5514 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 5514 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -176,7 +176,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select s.id2, s.id3 from ( diff --git ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out index 8ef02a186e..1851428caa 100644 --- ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out +++ ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out @@ -149,21 +149,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 253 Data size: 3647 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 1064 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -236,11 +236,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 90522 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 242 Data size: 87956 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -248,10 +248,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 242 Data size: 90188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 68262 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 242 Data size: 90188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 68262 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -540,14 +540,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s2 - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s3 @@ -562,14 +562,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -578,7 +578,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 506 Data size: 1931 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 532 Data size: 2129 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -657,25 +657,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s3 - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -684,7 +684,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 506 Data size: 1931 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 532 Data size: 2129 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -841,14 +841,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s3 - Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s1 @@ -866,7 +866,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 253 Data size: 3647 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 1064 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out index 5dfdede518..94f5cb4a43 100644 --- ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out +++ ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out @@ -54,7 +54,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -78,9 +79,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 'hll') + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: dummy @@ -101,8 +118,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 'hll') + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -124,6 +177,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: id1 + Column Types: int + Table: default.partunion1 PREHOOK: query: insert into table partunion1 partition(part1) select temps.* from ( diff --git ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out index e7599a45f1..6eda134dea 100644 --- ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out +++ ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition_2.q.out @@ -67,7 +67,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -91,9 +92,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 'hll') + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: dummy @@ -114,8 +131,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 'hll') + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -146,6 +199,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: id1 + Column Types: int + Table: default.partunion1 Stage: Stage-4 Tez diff --git ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index f9535c50bb..16f10e7495 100644 --- ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -53,11 +53,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) - Map 7 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) + Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Union 3 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -78,7 +80,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -111,7 +113,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s0 @@ -199,10 +201,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -220,6 +250,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -239,6 +297,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -253,6 +315,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM ( select key, value from ( @@ -963,10 +1029,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 7 <- Union 2 (CONTAINS) + Map 9 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Union 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1002,7 +1070,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s1 @@ -1020,7 +1088,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -1074,10 +1142,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1095,10 +1191,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -1148,6 +1272,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -1162,6 +1290,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM ( select key, value from src s0 @@ -1868,10 +2000,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 7 <- Union 2 (CONTAINS) + Map 9 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Union 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1907,7 +2041,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s1 @@ -1925,7 +2059,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -1979,10 +2113,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -2000,10 +2162,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -2053,6 +2243,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -2067,6 +2261,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM ( select key, value from src s0 @@ -2764,10 +2962,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Union 3 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2788,7 +2988,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -2876,10 +3076,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -2897,6 +3125,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -2916,6 +3172,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -2930,6 +3190,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION all @@ -3621,10 +3885,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3645,7 +3911,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -3725,6 +3991,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -3746,6 +4025,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 34000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -3765,6 +4087,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -3779,6 +4105,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION distinct diff --git ql/src/test/results/clientpositive/llap/union4.q.out ql/src/test/results/clientpositive/llap/union4.q.out index 2716072320..747c280c07 100644 --- ql/src/test/results/clientpositive/llap/union4.q.out +++ ql/src/test/results/clientpositive/llap/union4.q.out @@ -30,7 +30,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -51,7 +52,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -93,7 +94,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -117,6 +146,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -136,6 +178,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 diff --git ql/src/test/results/clientpositive/llap/union6.q.out ql/src/test/results/clientpositive/llap/union6.q.out index b001433938..068ec75719 100644 --- ql/src/test/results/clientpositive/llap/union6.q.out +++ ql/src/test/results/clientpositive/llap/union6.q.out @@ -29,8 +29,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Union 3 (CONTAINS) + Map 5 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -51,7 +52,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -68,6 +69,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -90,6 +104,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -109,6 +151,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 diff --git ql/src/test/results/clientpositive/llap/union_stats.q.out ql/src/test/results/clientpositive/llap/union_stats.q.out index a492757bc1..ba2700c80d 100644 --- ql/src/test/results/clientpositive/llap/union_stats.q.out +++ ql/src/test/results/clientpositive/llap/union_stats.q.out @@ -422,7 +422,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 10624 @@ -456,7 +456,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 10624 diff --git ql/src/test/results/clientpositive/llap/union_top_level.q.out ql/src/test/results/clientpositive/llap/union_top_level.q.out index a010232171..31b17553a0 100644 --- ql/src/test/results/clientpositive/llap/union_top_level.q.out +++ ql/src/test/results/clientpositive/llap/union_top_level.q.out @@ -644,8 +644,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -666,11 +667,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -688,11 +688,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -710,7 +709,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -736,7 +734,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -758,7 +784,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -780,6 +819,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -799,6 +851,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top PREHOOK: query: insert into table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a @@ -865,8 +921,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -887,11 +944,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -909,11 +965,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -931,7 +986,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -957,7 +1011,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -979,7 +1061,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1001,6 +1096,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -1020,6 +1128,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top PREHOOK: query: insert overwrite table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a diff --git ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out index ce1f9af964..ae58464a90 100644 --- ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out +++ ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out @@ -125,17 +125,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -203,17 +203,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -281,17 +281,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -359,17 +359,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -437,7 +437,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -448,19 +448,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 6, 9] selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 5:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 6:string, StringGroupColEqualStringGroupColumn(col 7:string, col 8:string)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 7:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 8:string) -> 9:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -531,7 +531,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -542,19 +542,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 6, 9] selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 5:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 6:string, StringGroupColEqualStringGroupColumn(col 7:string, col 8:string)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 7:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 8:string) -> 9:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -615,14 +615,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -719,17 +719,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 10) (type: boolean) - Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -787,14 +787,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -891,17 +891,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 10) (type: boolean) - Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -964,7 +964,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -975,7 +975,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 6] selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) Group By Vectorization: @@ -989,7 +989,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -998,7 +998,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1033,13 +1033,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1090,7 +1090,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1101,7 +1101,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 6] selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) Group By Vectorization: @@ -1115,7 +1115,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -1124,7 +1124,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1159,13 +1159,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index e0c164a9d0..300e07bc39 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -57,7 +57,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: testvec - Statistics: Num rows: 7 Data size: 1344 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:id:int, 1:dt:int, 2:greg_dt:string, 3:ROW__ID:struct] @@ -67,7 +67,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColEqualLongScalar(col 0:int, val 5) predicate: (id = 5) (type: boolean) - Statistics: Num rows: 7 Data size: 1344 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dt (type: int), greg_dt (type: string) outputColumnNames: dt, greg_dt @@ -75,7 +75,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2] - Statistics: Num rows: 7 Data size: 1344 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(dt), max(greg_dt) Group By Vectorization: @@ -87,7 +87,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -96,7 +96,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -142,13 +142,13 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index c0e4f6712a..aabfc732a9 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -127,7 +127,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 100 Data size: 49536 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -136,7 +136,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 10:binary) predicate: bin is not null (type: boolean) - Statistics: Num rows: 95 Data size: 47059 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -144,7 +144,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 95 Data size: 47059 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -158,7 +158,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 4 - Statistics: Num rows: 104 Data size: 51764 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) outputColumnNames: _col0 @@ -167,7 +167,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [22] selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 22:int - Statistics: Num rows: 104 Data size: 51764 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) Group By Vectorization: @@ -179,14 +179,14 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -203,7 +203,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 100 Data size: 49536 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -212,7 +212,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 10:binary) predicate: bin is not null (type: boolean) - Statistics: Num rows: 95 Data size: 47059 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -220,7 +220,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 95 Data size: 47059 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col10 (type: binary) sort order: + @@ -229,7 +229,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 95 Data size: 47059 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -262,7 +262,7 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -270,7 +270,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -287,13 +287,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -351,7 +351,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hundredorc - Statistics: Num rows: 100 Data size: 13824 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -361,7 +361,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [10] - Statistics: Num rows: 100 Data size: 13824 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -375,7 +375,7 @@ STAGE PLANS: keys: bin (type: binary) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 13824 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: binary) sort order: + @@ -384,7 +384,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 100 Data size: 13824 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -419,7 +419,7 @@ STAGE PLANS: keys: KEY._col0 (type: binary) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 6912 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: binary) outputColumnNames: _col0, _col1 @@ -427,7 +427,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 50 Data size: 6912 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: binary) sort order: + @@ -435,7 +435,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 50 Data size: 6912 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -453,13 +453,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 50 Data size: 6912 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 6912 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -539,7 +539,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 100 Data size: 14208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -548,7 +548,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2:int) predicate: i is not null (type: boolean) - Statistics: Num rows: 95 Data size: 13497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 @@ -556,7 +556,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 10] - Statistics: Num rows: 95 Data size: 13497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -570,7 +570,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 104 Data size: 14846 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 14500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 @@ -578,13 +578,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 10, 12] - Statistics: Num rows: 104 Data size: 14846 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 14500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 104 Data size: 14846 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 14500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -604,7 +604,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 100 Data size: 14208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -613,7 +613,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2:int) predicate: i is not null (type: boolean) - Statistics: Num rows: 95 Data size: 13497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 @@ -621,7 +621,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 10] - Statistics: Num rows: 95 Data size: 13497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -630,7 +630,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 95 Data size: 13497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_bround.q.out ql/src/test/results/clientpositive/llap/vector_bround.q.out index ffec163f1b..5ef8695f20 100644 --- ql/src/test/results/clientpositive/llap/vector_bround.q.out +++ ql/src/test/results/clientpositive/llap/vector_bround.q.out @@ -55,7 +55,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_vector_bround - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v0:double, 1:v1:double, 2:ROW__ID:struct] @@ -67,13 +67,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4] selectExpressions: FuncBRoundDoubleToDouble(col 0:double) -> 3:double, BRoundWithNumDigitsDoubleToDouble(col 1, decimalPlaces 1) -> 4:double - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 7c78fe51c7..22b772279b 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -130,7 +130,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -140,7 +140,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2] - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(50), avg(50.0), avg(50) Group By Vectorization: @@ -154,7 +154,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 256 Data size: 114688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -163,7 +163,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1049 Data size: 3992 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 256 Data size: 114688 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized, llap @@ -199,7 +199,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 1994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 256 Data size: 33792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -207,7 +207,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 524 Data size: 1994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 256 Data size: 33792 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 @@ -226,19 +226,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 524 Data size: 1994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 256 Data size: 33792 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index e1dc02d7e9..827ec2e9c7 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -80,7 +80,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 94248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -91,7 +91,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int - Statistics: Num rows: 500 Data size: 94248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -105,7 +105,7 @@ STAGE PLANS: keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 94248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + @@ -114,7 +114,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 94248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap @@ -150,7 +150,7 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 47124 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + @@ -158,7 +158,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 47124 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 @@ -177,19 +177,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 250 Data size: 47124 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -276,7 +276,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 94248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -287,7 +287,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int - Statistics: Num rows: 500 Data size: 94248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -301,7 +301,7 @@ STAGE PLANS: keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 94248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - @@ -310,7 +310,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 94248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap @@ -346,7 +346,7 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 47124 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - @@ -354,7 +354,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 47124 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 @@ -373,19 +373,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 250 Data size: 47124 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index 2a95ed089e..0f16d60d3b 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -46,22 +46,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -73,14 +73,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -133,14 +133,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -200,7 +200,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -211,7 +211,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 5] selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int - Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -225,7 +225,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -234,7 +234,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -269,7 +269,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 @@ -278,13 +278,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 2] selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2)(children: DoubleColDivideDoubleScalar(col 2:double, val 60.0)(children: CastLongToDouble(col 1:bigint) -> 2:double) -> 3:double) -> 2:double - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -337,7 +337,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -348,13 +348,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index 85ddc7cc8d..de42651b67 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -53,11 +53,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: m - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -67,14 +67,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -85,16 +85,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: n - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint), attr (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index ee9e40ab5e..a5bd7f6fd8 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -220,7 +220,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2b - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -229,7 +229,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: a is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index d7a393d48e..90086ea529 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1252,7 +1252,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_sales - Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1262,7 +1262,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [16] - Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -1274,7 +1274,7 @@ STAGE PLANS: keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 169 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1283,7 +1283,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 169 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1315,7 +1315,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 1755802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 169 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: @@ -1327,14 +1327,14 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1356,13 +1356,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index d77eb4350f..06b50bb461 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -119,15 +119,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 514968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 514968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1049 Data size: 514968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: llap @@ -138,13 +138,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 514968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 9800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 9800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -219,7 +219,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 514968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -229,7 +229,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 514968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ @@ -237,7 +237,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1049 Data size: 514968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized, llap @@ -267,19 +267,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 514968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 9800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 9800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index 0088d8b177..4f1b509c54 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -58,7 +58,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -70,7 +70,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToBoolean(col 0:decimal(4,2)) -> 4:boolean - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -80,7 +80,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -121,13 +121,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -174,7 +174,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -186,7 +186,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:tinyint - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -196,7 +196,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -237,13 +237,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -290,7 +290,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -302,7 +302,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:smallint - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + @@ -312,7 +312,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -353,13 +353,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -406,7 +406,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -418,7 +418,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:int - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -428,7 +428,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -469,13 +469,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -522,7 +522,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -534,7 +534,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToLong(col 0:decimal(4,2)) -> 4:bigint - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -544,7 +544,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -585,13 +585,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -638,7 +638,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -650,7 +650,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToDouble(col 0:decimal(4,2)) -> 4:float - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + @@ -660,7 +660,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -701,13 +701,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -754,7 +754,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -766,7 +766,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToDouble(col 0:decimal(4,2)) -> 4:double - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -776,7 +776,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -817,13 +817,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -870,7 +870,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -882,7 +882,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToString(col 0:decimal(4,2)) -> 4:string - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -892,7 +892,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -933,13 +933,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -986,7 +986,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(4,2), 1:u:decimal(5,0), 2:v:decimal(10,0), 3:ROW__ID:struct] @@ -998,7 +998,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: CastDecimalToTimestamp(col 0:decimal(4,2)) -> 4:timestamp - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + @@ -1008,7 +1008,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1049,13 +1049,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out index c4efa8c09a..ca13c0617f 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out @@ -47,7 +47,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -59,7 +59,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToBoolean(col 0:decimal(18,9)) -> 2:boolean - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -69,7 +69,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -110,13 +110,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -163,7 +163,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -175,7 +175,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToLong(col 0:decimal(18,9)) -> 2:tinyint - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -185,7 +185,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -226,13 +226,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -279,7 +279,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -291,7 +291,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToLong(col 0:decimal(18,9)) -> 2:smallint - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + @@ -301,7 +301,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -342,13 +342,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -395,7 +395,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -407,7 +407,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToLong(col 0:decimal(18,9)) -> 2:int - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -417,7 +417,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -458,13 +458,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -511,7 +511,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -523,7 +523,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToLong(col 0:decimal(18,9)) -> 2:bigint - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -533,7 +533,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -574,13 +574,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -627,7 +627,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -639,7 +639,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToDouble(col 0:decimal(18,9)) -> 2:float - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + @@ -649,7 +649,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -690,13 +690,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -743,7 +743,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -755,7 +755,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToDouble(col 0:decimal(18,9)) -> 2:double - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -765,7 +765,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -806,13 +806,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -859,7 +859,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -871,7 +871,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToString(col 0:decimal(18,9)) -> 2:string - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -881,7 +881,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -922,13 +922,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -986,7 +986,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -998,7 +998,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToBoolean(col 0:decimal(18,9)) -> 2:boolean - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -1008,7 +1008,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1049,13 +1049,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1102,7 +1102,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -1114,7 +1114,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToLong(col 0:decimal(18,9)) -> 2:tinyint - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -1124,7 +1124,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1165,13 +1165,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1218,7 +1218,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -1230,7 +1230,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToLong(col 0:decimal(18,9)) -> 2:smallint - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + @@ -1240,7 +1240,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1281,13 +1281,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1334,7 +1334,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -1346,7 +1346,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToLong(col 0:decimal(18,9)) -> 2:int - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1356,7 +1356,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1397,13 +1397,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1450,7 +1450,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -1462,7 +1462,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToLong(col 0:decimal(18,9)) -> 2:bigint - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -1472,7 +1472,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1513,13 +1513,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1566,7 +1566,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -1578,7 +1578,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToDouble(col 0:decimal(18,9)) -> 2:float - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + @@ -1588,7 +1588,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1629,13 +1629,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1682,7 +1682,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -1694,7 +1694,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToDouble(col 0:decimal(18,9)) -> 2:double - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -1704,7 +1704,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1745,13 +1745,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1798,7 +1798,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:decimal(18,9), 1:ROW__ID:struct] @@ -1810,7 +1810,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] selectExpressions: CastDecimalToString(col 0:decimal(18,9)) -> 2:string - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -1820,7 +1820,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1861,13 +1861,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out index 1a2f453d62..9a6dd5414c 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out @@ -132,7 +132,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_6_1 - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 2684 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(10,5), 1:value:int, 2:ROW__ID:struct] @@ -143,7 +143,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 2684 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,5)), _col1 (type: int) sort order: ++ @@ -153,7 +153,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 2684 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -194,13 +194,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 2684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 2684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -273,7 +273,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_6_2 - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(17,4), 1:value:int, 2:ROW__ID:struct] @@ -284,7 +284,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(17,4)), _col1 (type: int) sort order: ++ @@ -294,7 +294,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -335,13 +335,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -424,7 +424,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_6_1 - Statistics: Num rows: 27 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(10,5), 1:value:int, 2:ROW__ID:struct] @@ -436,7 +436,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: CastDecimalToDecimal(col 0:decimal(10,5)) -> 3:decimal(18,5) - Statistics: Num rows: 27 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(18,5)) sort order: + @@ -446,7 +446,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 54 Data size: 6048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -468,7 +468,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_6_2 - Statistics: Num rows: 27 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(17,4), 1:value:int, 2:ROW__ID:struct] @@ -480,7 +480,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: CastDecimalToDecimal(col 0:decimal(17,4)) -> 3:decimal(18,5) - Statistics: Num rows: 27 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3024 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(18,5)) sort order: + @@ -490,7 +490,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 54 Data size: 6048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -531,13 +531,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 54 Data size: 6048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 4928 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 54 Data size: 6048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 4928 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -652,7 +652,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_6_1 - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 2684 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(10,5), 1:value:int, 2:ROW__ID:struct] @@ -664,7 +664,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4] selectExpressions: DecimalColAddDecimalScalar(col 0:decimal(10,5), val 5.5) -> 3:decimal(11,5), LongColMultiplyLongScalar(col 1:int, val 11) -> 4:int - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -674,7 +674,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [3] - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(11,5)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -716,13 +716,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index 3c302b62f7..f94f1c120e 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(22,2), 1:ROW__ID:struct] @@ -108,7 +108,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(22,2)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(22,2)) outputColumnNames: _col0 @@ -116,7 +116,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -132,13 +132,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1096 Data size: 116858 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1101 Data size: 246624 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -164,7 +164,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 1049 Data size: 111776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(24,0), 1:ROW__ID:struct] @@ -174,7 +174,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:decimal(24,0)) predicate: dec is not null (type: boolean) - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(24,0)) outputColumnNames: _col0 @@ -182,7 +182,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(26,2)) sort order: + @@ -193,7 +193,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index c40718b6a0..2fc5277fd6 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -573,7 +573,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_precision - Statistics: Num rows: 75 Data size: 8176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 3584 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(20,10), 1:ROW__ID:struct] @@ -584,7 +584,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 75 Data size: 8176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 3584 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(dec), sum(dec) Group By Vectorization: @@ -596,7 +596,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -605,7 +605,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -651,13 +651,13 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index 938e47d22b..523d59ff8d 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -54,7 +54,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_txt - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(10,0), 1:ROW__ID:struct] @@ -66,7 +66,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 2] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(10,0), decimalPlaces -1) -> 2:decimal(11,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + @@ -76,7 +76,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [2] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap LLAP IO: no inputs @@ -119,13 +119,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -172,7 +172,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_txt - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(10,0), 1:ROW__ID:struct] @@ -184,7 +184,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 2] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(10,0), decimalPlaces -1) -> 2:decimal(11,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: decimal(11,0)) sort order: + @@ -194,7 +194,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap LLAP IO: no inputs @@ -237,13 +237,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -316,7 +316,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_rc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(10,0), 1:ROW__ID:struct] @@ -328,7 +328,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 2] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(10,0), decimalPlaces -1) -> 2:decimal(11,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + @@ -338,7 +338,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [2] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap LLAP IO: no inputs @@ -380,13 +380,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -433,7 +433,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_rc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(10,0), 1:ROW__ID:struct] @@ -445,7 +445,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 2] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(10,0), decimalPlaces -1) -> 2:decimal(11,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: decimal(11,0)) sort order: + @@ -455,7 +455,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap LLAP IO: no inputs @@ -497,13 +497,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -576,7 +576,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(10,0), 1:ROW__ID:struct] @@ -588,7 +588,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 2] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(10,0), decimalPlaces -1) -> 2:decimal(11,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + @@ -598,7 +598,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [2] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -640,13 +640,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -693,7 +693,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(10,0), 1:ROW__ID:struct] @@ -705,7 +705,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 2] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(10,0), decimalPlaces -1) -> 2:decimal(11,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: decimal(11,0)) sort order: + @@ -715,7 +715,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -757,13 +757,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index ebc3642b20..b6eac0c670 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -58,7 +58,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_1_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(38,18), 1:ROW__ID:struct] @@ -70,7 +70,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] selectExpressions: FuncRoundDecimalToDecimal(col 0:decimal(38,18)) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 0) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 1) -> 4:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 2) -> 5:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 3) -> 6:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -1) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -2) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -3) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -4) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -5) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -6) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -7) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -8) -> 14:decimal(21,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -80,7 +80,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -122,13 +122,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -224,7 +224,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_2_orc - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:pos:decimal(38,18), 1:neg:decimal(38,18), 2:ROW__ID:struct] @@ -236,7 +236,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] selectExpressions: FuncRoundDecimalToDecimal(col 0:decimal(38,18)) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 0) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 1) -> 5:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 2) -> 6:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 3) -> 7:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 4) -> 8:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -1) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -2) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -3) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -4) -> 12:decimal(21,0), FuncRoundDecimalToDecimal(col 1:decimal(38,18)) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces 0) -> 14:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces 1) -> 15:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces 2) -> 16:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces 3) -> 17:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces 4) -> 18:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces -1) -> 19:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces -2) -> 20:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces -3) -> 21:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces -4) -> 22:decimal(21,0) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -246,7 +246,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -288,13 +288,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -417,7 +417,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_3_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:dec:decimal(38,18), 1:ROW__ID:struct] @@ -429,7 +429,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -15) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -16) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -13) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -14) -> 5:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -11) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -12) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -9) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -10) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -7) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -8) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -5) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -6) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -3) -> 14:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -4) -> 15:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -1) -> 16:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces -2) -> 17:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 0) -> 18:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 1) -> 19:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 2) -> 20:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 3) -> 21:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 4) -> 22:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 5) -> 23:decimal(26,5), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 6) -> 24:decimal(27,6), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 7) -> 25:decimal(28,7), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 8) -> 26:decimal(29,8), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 9) -> 27:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 10) -> 28:decimal(31,10), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 11) -> 29:decimal(32,11), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 12) -> 30:decimal(33,12), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 13) -> 31:decimal(34,13), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 14) -> 32:decimal(35,14), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 15) -> 33:decimal(36,15), FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 16) -> 34:decimal(37,16) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -439,7 +439,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -481,13 +481,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -599,7 +599,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_4_orc - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:pos:decimal(38,18), 1:neg:decimal(38,18), 2:ROW__ID:struct] @@ -611,7 +611,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(38,18), decimalPlaces 9) -> 3:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 1:decimal(38,18), decimalPlaces 9) -> 4:decimal(30,9) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + @@ -621,7 +621,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [4] - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -664,13 +664,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 3] selectExpressions: ConstantVectorExpression(val 1809242.315111134) -> 2:decimal(17,9), ConstantVectorExpression(val -1809242.315111134) -> 3:decimal(17,9) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out index 25c7210d01..dedad2422f 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out @@ -91,7 +91,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_trailing - Statistics: Num rows: 30 Data size: 6840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:id:int, 1:a:decimal(10,4), 2:b:decimal(15,8), 3:ROW__ID:struct] @@ -102,7 +102,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 30 Data size: 6840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -112,7 +112,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 30 Data size: 6840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(10,4)), _col2 (type: decimal(15,8)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -154,13 +154,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 30 Data size: 6840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 30 Data size: 6840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index ea01380a1d..56248d1297 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -71,7 +71,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -83,13 +83,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColAddDecimalColumn(col 0:decimal(20,10), col 0:decimal(20,10)) -> 3:decimal(21,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -187,7 +187,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -199,13 +199,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DecimalColAddDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(21,10) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -303,7 +303,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -315,13 +315,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColAddDoubleColumn(col 3:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColDivideDoubleScalar(col 4:double, val 2.0)(children: CastLongToDouble(col 1:int) -> 4:double) -> 5:double) -> 4:double - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -419,7 +419,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -431,13 +431,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColAddDoubleScalar(col 3:double, val 1.0)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double) -> 4:double - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -535,7 +535,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -547,13 +547,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColSubtractDecimalColumn(col 0:decimal(20,10), col 0:decimal(20,10)) -> 3:decimal(21,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -651,7 +651,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -663,13 +663,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DecimalColSubtractDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(21,10) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -767,7 +767,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -779,13 +779,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColSubtractDoubleColumn(col 3:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColDivideDoubleScalar(col 4:double, val 2.0)(children: CastLongToDouble(col 1:int) -> 4:double) -> 5:double) -> 4:double - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -883,7 +883,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -895,13 +895,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColSubtractDoubleScalar(col 3:double, val 1.0)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double) -> 4:double - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -999,7 +999,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1011,13 +1011,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColMultiplyDecimalColumn(col 0:decimal(20,10), col 0:decimal(20,10)) -> 3:decimal(38,17) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1115,7 +1115,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1125,7 +1125,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColGreaterDecimalScalar(col 4:decimal(31,10), val 0)(children: DecimalColMultiplyDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(31,10)) predicate: ((key * CAST( value AS decimal(10,0))) > 0) (type: boolean) - Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: _col0, _col1 @@ -1133,13 +1133,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1222,7 +1222,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1234,13 +1234,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DecimalColMultiplyDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(31,10) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1338,7 +1338,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1350,13 +1350,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColMultiplyDoubleColumn(col 3:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColDivideDoubleScalar(col 4:double, val 2.0)(children: CastLongToDouble(col 1:int) -> 4:double) -> 5:double) -> 4:double - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1454,7 +1454,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1466,13 +1466,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColMultiplyDoubleScalar(col 3:double, val 2.0)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double) -> 4:double - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1570,7 +1570,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1582,19 +1582,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColDivideDecimalScalar(col 0:decimal(20,10), val 0) -> 3:decimal(22,12) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1655,7 +1655,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1665,7 +1665,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColNotEqualDecimalScalar(col 0:decimal(20,10), val 0) predicate: (key <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / key) (type: decimal(38,18)) outputColumnNames: _col0 @@ -1674,13 +1674,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DecimalColDivideDecimalColumn(col 0:decimal(20,10), col 0:decimal(20,10)) -> 3:decimal(38,18) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1774,7 +1774,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1784,7 +1784,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColNotEqualLongScalar(col 1:int, val 0) predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) outputColumnNames: _col0 @@ -1793,13 +1793,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DecimalColDivideDecimalColumn(col 0:decimal(20,10), col 3:decimal(10,0))(children: CastLongToDecimal(col 1:int) -> 3:decimal(10,0)) -> 4:decimal(31,21) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1883,7 +1883,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -1893,7 +1893,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColNotEqualLongScalar(col 1:int, val 0) predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 @@ -1902,13 +1902,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4] selectExpressions: DoubleColDivideDoubleColumn(col 3:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColDivideDoubleScalar(col 4:double, val 2.0)(children: CastLongToDouble(col 1:int) -> 4:double) -> 5:double) -> 4:double - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1992,7 +1992,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2004,13 +2004,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: DoubleScalarAddDoubleColumn(val 1.0, col 4:double)(children: DoubleColDivideDoubleScalar(col 3:double, val 2.0)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double) -> 4:double) -> 3:double - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2108,7 +2108,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2120,13 +2120,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: FuncAbsDecimalToDecimal(col 0:decimal(20,10)) -> 3:decimal(20,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2228,7 +2228,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2239,7 +2239,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(key), count(key), avg(key) Group By Vectorization: @@ -2253,7 +2253,7 @@ STAGE PLANS: keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2264,7 +2264,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 3] - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2312,7 +2312,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 4012 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), _col3 (type: decimal(24,14)), _col1 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -2321,7 +2321,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 5, 3, 1] selectExpressions: DecimalColDivideDecimalColumn(col 1:decimal(30,10), col 4:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 4:decimal(19,0)) -> 5:decimal(38,18) - Statistics: Num rows: 19 Data size: 2204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2331,7 +2331,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [5, 3, 1] - Statistics: Num rows: 19 Data size: 2204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) Reducer 3 Execution mode: vectorized, llap @@ -2356,13 +2356,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 19 Data size: 2204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 19 Data size: 2204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2422,7 +2422,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2434,13 +2434,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: FuncNegateDecimalToDecimal(col 0:decimal(20,10)) -> 3:decimal(20,10) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2603,14 +2603,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ceil(key) (type: decimal(11,0)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2693,7 +2693,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2705,13 +2705,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: FuncFloorDecimalToDecimal(col 0:decimal(20,10)) -> 3:decimal(11,0) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2809,7 +2809,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2821,13 +2821,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0:decimal(20,10), decimalPlaces 2) -> 3:decimal(13,2) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2925,7 +2925,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -2937,13 +2937,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3] selectExpressions: VectorUDFAdaptor(power(key, 2)) -> 3:double - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3041,7 +3041,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3053,13 +3053,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5] selectExpressions: DecimalColModuloDecimalColumn(col 3:decimal(21,10), col 4:decimal(22,12))(children: DecimalColAddDecimalScalar(col 0:decimal(20,10), val 1) -> 3:decimal(21,10), DecimalColDivideDecimalScalar(col 0:decimal(20,10), val 2) -> 4:decimal(22,12)) -> 5:decimal(22,12) - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3160,7 +3160,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3171,7 +3171,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev(key), variance(key) Group By Vectorization: @@ -3185,7 +3185,7 @@ STAGE PLANS: keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2788 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3196,7 +3196,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2788 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3244,13 +3244,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 2204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 19 Data size: 2204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3313,7 +3313,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3324,7 +3324,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_samp(key), var_samp(key) Group By Vectorization: @@ -3338,7 +3338,7 @@ STAGE PLANS: keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2788 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3349,7 +3349,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2788 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3397,13 +3397,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 2204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 19 Data size: 2204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3466,19 +3466,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: histogram_numeric(_col0, 3) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: array) Execution mode: llap LLAP IO: all inputs @@ -3500,10 +3500,10 @@ STAGE PLANS: aggregations: histogram_numeric(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 832 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3550,7 +3550,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3561,7 +3561,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(key) Group By Vectorization: @@ -3573,7 +3573,7 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -3582,7 +3582,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3628,13 +3628,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3681,7 +3681,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3692,7 +3692,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(key) Group By Vectorization: @@ -3704,7 +3704,7 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -3713,7 +3713,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3759,13 +3759,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3812,7 +3812,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] @@ -3823,7 +3823,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(key) Group By Vectorization: @@ -3835,7 +3835,7 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -3844,7 +3844,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3890,13 +3890,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out index 011422f029..90f68f5bba 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4032 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(14,5), 1:value:int, 2:ROW__ID:struct] @@ -83,7 +83,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0:decimal(14,5), val 10) predicate: (key = 10) (type: boolean) - Statistics: Num rows: 5 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: null (type: double), null (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -92,13 +92,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9] selectExpressions: ConstantVectorExpression(val null) -> 3:double, ConstantVectorExpression(val null) -> 4:double, ConstantVectorExpression(val 1.4711276743037347) -> 5:double, ConstantVectorExpression(val -0.8390715290764524) -> 6:double, ConstantVectorExpression(val -0.5440211108893698) -> 7:double, ConstantVectorExpression(val 0.6483608274590866) -> 8:double, ConstantVectorExpression(val 0.17453292519943295) -> 9:double - Statistics: Num rows: 5 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,7 +169,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4184 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:decimal(14,5), 1:value:int, 2:ROW__ID:struct] @@ -179,7 +179,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0:decimal(14,5), val 10) predicate: (key = 10) (type: boolean) - Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -188,13 +188,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10] selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 2.302585092994046) -> 5:double, ConstantVectorExpression(val 1.0) -> 6:double, FuncLogWithBaseLongToDouble(col 1:double) -> 7:double, VectorUDFAdaptor(log(value, 10)) -> 8:double, ConstantVectorExpression(val 1.0) -> 9:double, ConstantVectorExpression(val 3.1622776601683795) -> 10:double - Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby4.q.out ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index ce893561a7..d0bba2e949 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -43,6 +43,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -143,6 +145,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -160,6 +216,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) diff --git ql/src/test/results/clientpositive/llap/vector_groupby6.q.out ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index cb98fc46e4..4c68fd0775 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -43,6 +43,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -143,6 +145,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -160,6 +216,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index a17f45754b..39de888591 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -632,8 +632,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -712,10 +714,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials @@ -727,7 +757,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -749,6 +779,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -766,6 +824,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 Stage: Stage-1 Move Operator @@ -780,6 +842,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index 3a372819ca..1877bba0c6 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -640,7 +640,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 6 Data size: 2256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c:int, 3:d:int, 4:ROW__ID:struct] @@ -652,7 +652,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 5] selectExpressions: LongColAddLongColumn(col 2:int, col 3:int) -> 5:int - Statistics: Num rows: 6 Data size: 2256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2) Group By Vectorization: @@ -666,7 +666,7 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 2256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ @@ -677,7 +677,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [2] - Statistics: Num rows: 6 Data size: 2256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -725,7 +725,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ @@ -736,7 +736,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [3] - Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -767,7 +767,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 12 Data size: 4512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2184 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -776,13 +776,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 12 Data size: 4512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12 Data size: 4512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index 61457fe2a2..4de6ebbff2 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -54,7 +54,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:category:int, 1:live:int, 2:comments:int, 3:ROW__ID:struct] @@ -65,7 +65,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(live), max(comments) Group By Vectorization: @@ -79,7 +79,7 @@ STAGE PLANS: keys: category (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -90,7 +90,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [2, 3] - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -138,7 +138,7 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Filter Operator Filter Vectorization: @@ -146,7 +146,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 2:int, val 0) predicate: (_col3 > 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col3 (type: int) sort order: ++ @@ -158,7 +158,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [1] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reducer 3 Execution mode: vectorized, llap @@ -183,7 +183,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 1] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -217,7 +217,7 @@ STAGE PLANS: outputTypes: [int, int, int, int] partitionExpressions: [col 0:int] streamingColumns: [3] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -225,13 +225,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 1, 3] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index e594b87c9c..0da9a4560a 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -255,7 +255,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 3804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -265,7 +265,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [9] - Statistics: Num rows: 1000 Data size: 3804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -277,7 +277,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 3804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -286,7 +286,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 3804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -319,7 +319,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 1902 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -327,7 +327,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 1902 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -345,19 +345,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 1902 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -454,7 +454,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 3804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -464,7 +464,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [9] - Statistics: Num rows: 1000 Data size: 3804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -476,7 +476,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 3804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -485,7 +485,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 3804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -517,7 +517,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 1902 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0) Group By Vectorization: @@ -531,7 +531,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -539,7 +539,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 250 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -547,7 +547,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -564,13 +564,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 250 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -737,7 +737,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 125532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 126616 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -746,7 +746,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColEqualLongScalar(col 9:int, val 1) predicate: (ss_ticket_number = 1) (type: boolean) - Statistics: Num rows: 5 Data size: 627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_item_sk (type: int), ss_quantity (type: int), ss_wholesale_cost_decimal (type: decimal(38,18)), ss_net_profit (type: double) outputColumnNames: ss_item_sk, ss_quantity, ss_wholesale_cost_decimal, ss_net_profit @@ -754,7 +754,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 10, 12, 23] - Statistics: Num rows: 5 Data size: 627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ss_quantity), max(ss_net_profit), max(ss_wholesale_cost_decimal) Group By Vectorization: @@ -768,7 +768,7 @@ STAGE PLANS: keys: ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -777,7 +777,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: double), _col3 (type: decimal(38,18)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -812,7 +812,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(38,18)) outputColumnNames: _col1, _col2, _col3, _col4 @@ -820,7 +820,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) Group By Vectorization: @@ -834,7 +834,7 @@ STAGE PLANS: keys: 1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -843,7 +843,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: struct), _col4 (type: double), _col5 (type: struct), _col6 (type: decimal(38,18)), _col7 (type: struct) Reducer 3 Execution mode: vectorized, llap @@ -867,7 +867,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 1 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -876,13 +876,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 1, 2, 3, 4, 5, 6, 7] selectExpressions: ConstantVectorExpression(val 1) -> 8:int - Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -968,7 +968,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 125532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 126616 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -978,7 +978,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 9, 10, 12, 23] - Statistics: Num rows: 1000 Data size: 125532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 126616 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ss_quantity), max(ss_net_profit), max(ss_wholesale_cost_decimal) Group By Vectorization: @@ -992,7 +992,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1000 Data size: 125532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 66000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1001,7 +1001,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 125532 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 66000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: decimal(38,18)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1036,7 +1036,7 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 62766 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 66000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int), _col3 (type: double), _col4 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1044,7 +1044,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0, 2, 3, 4] - Statistics: Num rows: 500 Data size: 62766 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 66000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) Group By Vectorization: @@ -1058,7 +1058,7 @@ STAGE PLANS: keys: _col1 (type: int), _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 250 Data size: 31383 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -1066,7 +1066,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] - Statistics: Num rows: 250 Data size: 31383 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1074,7 +1074,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 31383 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) Reducer 3 Execution mode: vectorized, llap @@ -1092,13 +1092,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] - Statistics: Num rows: 250 Data size: 31383 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 31383 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index 435d3088e6..438ffd9f5d 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -54,7 +54,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:ROW__ID:struct] @@ -64,7 +64,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int) outputColumnNames: _col0 @@ -72,7 +72,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -89,7 +89,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -97,13 +97,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -129,7 +129,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:int, 1:ROW__ID:struct] @@ -139,7 +139,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 @@ -147,7 +147,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -158,7 +158,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -220,7 +220,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:ROW__ID:struct] @@ -230,7 +230,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int) outputColumnNames: _col0 @@ -238,7 +238,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -256,13 +256,13 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -288,7 +288,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:a:int, 1:ROW__ID:struct] @@ -298,7 +298,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 @@ -306,7 +306,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -318,7 +318,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -329,7 +329,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -423,7 +423,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct] @@ -433,7 +433,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int) outputColumnNames: _col0 @@ -441,7 +441,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -459,7 +459,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 @@ -467,13 +467,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3, 0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -499,7 +499,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct] @@ -509,7 +509,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1:int, val 2) predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -517,7 +517,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -528,7 +528,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -591,7 +591,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct] @@ -601,7 +601,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1:int, val 2) predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -609,7 +609,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -620,7 +620,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -643,7 +643,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct] @@ -653,7 +653,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -661,7 +661,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -680,13 +680,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -752,7 +752,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct] @@ -762,7 +762,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -770,7 +770,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -789,7 +789,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -798,13 +798,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 1] selectExpressions: LongColMultiplyLongScalar(col 0:int, val 2) -> 4:int, LongColMultiplyLongScalar(col 0:int, val 5) -> 5:int - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -830,7 +830,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct] @@ -840,7 +840,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1:int, val 2) predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -848,7 +848,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -859,7 +859,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -922,7 +922,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct] @@ -932,7 +932,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -940,7 +940,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -959,7 +959,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -967,13 +967,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3, 1, 0] - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -999,7 +999,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct] @@ -1009,7 +1009,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1:int, val 2) predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1017,7 +1017,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1028,7 +1028,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1091,7 +1091,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct] @@ -1101,7 +1101,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -1109,7 +1109,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1128,7 +1128,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -1136,13 +1136,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 3, 1] - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1168,7 +1168,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct] @@ -1178,7 +1178,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1:int, val 2) predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1186,7 +1186,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1197,7 +1197,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1260,7 +1260,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct] @@ -1270,7 +1270,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1:int, val 2) predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1278,7 +1278,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1289,7 +1289,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1312,7 +1312,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct] @@ -1322,7 +1322,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -1330,7 +1330,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1349,7 +1349,7 @@ STAGE PLANS: outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -1357,13 +1357,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3, 1, 0] - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1429,7 +1429,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct] @@ -1439,7 +1439,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1:int, val 2) predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1447,7 +1447,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1458,7 +1458,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1481,7 +1481,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct] @@ -1491,7 +1491,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -1499,7 +1499,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1518,7 +1518,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -1526,13 +1526,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 3, 1] - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 7f251627db..3cc72fcc69 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -102,11 +102,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -119,14 +119,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,16 +137,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: llap LLAP IO: all inputs @@ -196,11 +196,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -213,14 +213,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -231,16 +231,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: llap LLAP IO: all inputs @@ -290,7 +290,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -300,7 +300,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -318,7 +318,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -326,13 +326,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -352,7 +352,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -362,7 +362,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2] - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -371,7 +371,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -430,7 +430,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -440,7 +440,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -458,7 +458,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -466,13 +466,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -492,7 +492,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -502,7 +502,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2] - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -511,7 +511,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -570,7 +570,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -580,7 +580,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -597,7 +597,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -605,13 +605,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 4] - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -631,7 +631,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -641,7 +641,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2] - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -650,7 +650,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -709,7 +709,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -719,7 +719,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -736,7 +736,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -744,13 +744,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 4] - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -770,7 +770,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -780,7 +780,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2] - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -789,7 +789,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index 8b9c940ef7..a6e7e709f8 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -84,7 +84,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct] @@ -95,7 +95,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -115,13 +115,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1666 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1666 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -147,7 +147,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct] @@ -158,7 +158,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -169,7 +169,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1] - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -237,7 +237,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct] @@ -248,7 +248,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -259,7 +259,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -282,7 +282,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct] @@ -293,7 +293,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -313,13 +313,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 6 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1666 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 1240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1666 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index 37870bc23f..068453f068 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_0 - Statistics: Num rows: 200 Data size: 3940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -108,7 +108,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 200 Data size: 3940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -120,14 +120,14 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -160,13 +160,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -268,7 +268,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_1 - Statistics: Num rows: 200 Data size: 12640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -278,7 +278,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 200 Data size: 12640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -290,14 +290,14 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -330,13 +330,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -438,7 +438,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_2a - Statistics: Num rows: 200 Data size: 3940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -448,7 +448,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 200 Data size: 3940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -460,14 +460,14 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -500,13 +500,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -595,7 +595,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_2b - Statistics: Num rows: 200 Data size: 3940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 792 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true Select Operator @@ -605,7 +605,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 200 Data size: 3940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 792 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -617,14 +617,14 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -657,13 +657,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -752,7 +752,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_3 - Statistics: Num rows: 200 Data size: 3940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -762,7 +762,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 200 Data size: 3940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -774,14 +774,14 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -814,13 +814,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 1fb6e9d48c..687b4af9b1 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -943,7 +943,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_date - Statistics: Num rows: 137 Data size: 94360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -953,13 +953,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1178,7 +1178,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_date - Statistics: Num rows: 137 Data size: 94360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1188,7 +1188,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 94360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -1196,7 +1196,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 137 Data size: 94360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized, llap @@ -1226,13 +1226,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -1240,7 +1240,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Reducer 3 @@ -1259,19 +1259,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3368,7 +3368,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_date - Statistics: Num rows: 137 Data size: 63269 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -3378,13 +3378,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3603,7 +3603,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_date - Statistics: Num rows: 137 Data size: 63269 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -3613,7 +3613,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 63269 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -3621,7 +3621,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 137 Data size: 63269 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized, llap @@ -3651,13 +3651,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -3665,7 +3665,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Reducer 3 @@ -3684,19 +3684,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 01e9c5bb14..5eaed53adb 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -132,7 +132,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -146,7 +146,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -188,7 +188,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -258,7 +258,7 @@ STAGE PLANS: outputColumns: [3, 4, 5, 6, 7, 8, 9, 0, 1, 2] outputTypes: [int, int, int, double, double, bigint, bigint, string, string, double] streamingColumns: [3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -266,13 +266,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -392,7 +392,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -406,7 +406,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -436,7 +436,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -494,14 +494,14 @@ STAGE PLANS: window function: GenericUDAFCountEvaluator window frame: RANGE PRECEDING(MAX)~CURRENT isStar: true - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -621,7 +621,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -635,7 +635,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -665,7 +665,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -723,14 +723,14 @@ STAGE PLANS: window function: GenericUDAFCountEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT isStar: true - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -850,7 +850,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -865,7 +865,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -907,7 +907,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -978,7 +978,7 @@ STAGE PLANS: outputTypes: [int, int, int, double, double, bigint, bigint, string, string, double] partitionExpressions: [col 0:string] streamingColumns: [3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -986,13 +986,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1112,7 +1112,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -1127,7 +1127,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1157,7 +1157,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1215,14 +1215,14 @@ STAGE PLANS: window function: GenericUDAFCountEvaluator window frame: RANGE PRECEDING(MAX)~CURRENT isStar: true - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1342,7 +1342,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -1357,7 +1357,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1387,7 +1387,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1445,14 +1445,14 @@ STAGE PLANS: window function: GenericUDAFCountEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT isStar: true - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1572,7 +1572,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -1588,7 +1588,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [5] valueColumnNums: [0, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1630,7 +1630,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 1, 3] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1701,7 +1701,7 @@ STAGE PLANS: outputTypes: [int, int, int, double, double, bigint, bigint, string, string, double] partitionExpressions: [ConstantVectorExpression(val 0) -> 11:int] streamingColumns: [4, 5, 6, 7] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -1709,13 +1709,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 1, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1835,7 +1835,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -1851,7 +1851,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [5] valueColumnNums: [0, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1881,7 +1881,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1939,14 +1939,14 @@ STAGE PLANS: window function: GenericUDAFCountEvaluator window frame: RANGE PRECEDING(MAX)~CURRENT isStar: true - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2066,7 +2066,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -2082,7 +2082,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [5] valueColumnNums: [0, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2112,7 +2112,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2170,14 +2170,14 @@ STAGE PLANS: window function: GenericUDAFCountEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT isStar: true - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10824 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2291,7 +2291,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -2305,7 +2305,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2347,7 +2347,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2397,7 +2397,7 @@ STAGE PLANS: outputColumns: [3, 4, 5, 6, 0, 1, 2] outputTypes: [double, double, double, double, string, string, double] streamingColumns: [] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -2405,13 +2405,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2519,7 +2519,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -2533,7 +2533,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2575,7 +2575,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2625,7 +2625,7 @@ STAGE PLANS: outputColumns: [3, 4, 5, 6, 0, 1, 2] outputTypes: [double, double, double, double, string, string, double] streamingColumns: [] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -2633,13 +2633,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2747,7 +2747,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -2761,7 +2761,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2791,7 +2791,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2829,14 +2829,14 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2944,7 +2944,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -2959,7 +2959,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3001,7 +3001,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3052,7 +3052,7 @@ STAGE PLANS: outputTypes: [double, double, double, double, string, string, double] partitionExpressions: [col 0:string] streamingColumns: [] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -3060,13 +3060,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3174,7 +3174,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -3189,7 +3189,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3231,7 +3231,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3282,7 +3282,7 @@ STAGE PLANS: outputTypes: [double, double, double, double, string, string, double] partitionExpressions: [col 0:string] streamingColumns: [] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -3290,13 +3290,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3404,7 +3404,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -3419,7 +3419,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3449,7 +3449,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3487,14 +3487,14 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3602,7 +3602,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -3618,7 +3618,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [5] valueColumnNums: [0, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3660,7 +3660,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 1, 3] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3711,7 +3711,7 @@ STAGE PLANS: outputTypes: [double, double, double, double, string, string, double] partitionExpressions: [ConstantVectorExpression(val 0) -> 8:int] streamingColumns: [] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -3719,13 +3719,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 1, 3, 4, 5, 6, 7] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3833,7 +3833,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -3849,7 +3849,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [5] valueColumnNums: [0, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3891,7 +3891,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 1, 3] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3942,7 +3942,7 @@ STAGE PLANS: outputTypes: [double, double, double, double, string, string, double] partitionExpressions: [ConstantVectorExpression(val 0) -> 8:int] streamingColumns: [] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -3950,13 +3950,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 1, 3, 4, 5, 6, 7] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4064,7 +4064,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -4080,7 +4080,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [5] valueColumnNums: [0, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -4110,7 +4110,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4148,14 +4148,14 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4305,7 +4305,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc_decimal - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:decimal(38,18), 3:ROW__ID:struct] @@ -4319,7 +4319,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -4361,7 +4361,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 23664 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4411,7 +4411,7 @@ STAGE PLANS: outputColumns: [3, 4, 5, 6, 0, 1, 2] outputTypes: [decimal(38,18), decimal(38,18), decimal(38,18), decimal(38,18), string, string, decimal(38,18)] streamingColumns: [] - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 23664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -4419,13 +4419,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 30416 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 30416 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4533,7 +4533,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc_decimal - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:decimal(38,18), 3:ROW__ID:struct] @@ -4548,7 +4548,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: decimal(38,18)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -4590,7 +4590,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 23664 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4641,7 +4641,7 @@ STAGE PLANS: outputTypes: [decimal(38,18), decimal(38,18), decimal(38,18), decimal(38,18), string, string, decimal(38,18)] partitionExpressions: [col 0:string] streamingColumns: [] - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 23664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -4649,13 +4649,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 30416 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 18720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 30416 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4783,7 +4783,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc_long - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_bigint:bigint, 3:ROW__ID:struct] @@ -4797,7 +4797,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_bigint (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -4839,7 +4839,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4889,7 +4889,7 @@ STAGE PLANS: outputColumns: [3, 4, 5, 6, 0, 1, 2] outputTypes: [bigint, bigint, bigint, double, string, string, bigint] streamingColumns: [] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -4897,13 +4897,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5011,7 +5011,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc_long - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_bigint:bigint, 3:ROW__ID:struct] @@ -5026,7 +5026,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_bigint (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -5068,7 +5068,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5119,7 +5119,7 @@ STAGE PLANS: outputTypes: [bigint, bigint, bigint, double, string, string, bigint] partitionExpressions: [col 0:string] streamingColumns: [] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -5127,13 +5127,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5235,7 +5235,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 7488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -5249,7 +5249,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 7488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -5291,7 +5291,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 40 Data size: 7488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 14936 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5324,7 +5324,7 @@ STAGE PLANS: outputColumns: [2, 0, 1] outputTypes: [int, string, double] streamingColumns: [2] - Statistics: Num rows: 40 Data size: 7488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 14936 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -5332,13 +5332,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 7488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 7488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5434,7 +5434,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -5449,7 +5449,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -5491,7 +5491,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5525,7 +5525,7 @@ STAGE PLANS: outputTypes: [int, string, string, double] partitionExpressions: [col 0:string] streamingColumns: [3] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -5533,13 +5533,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 3] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5635,7 +5635,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -5650,7 +5650,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -5680,7 +5680,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5701,14 +5701,14 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5804,7 +5804,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_ptf_part_simple_orc - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_mfgr:string, 1:p_name:string, 2:p_retailprice:double, 3:ROW__ID:struct] @@ -5820,7 +5820,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0, 9] valueColumnNums: [2] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -5862,7 +5862,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 3] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5896,7 +5896,7 @@ STAGE PLANS: outputTypes: [int, string, string, double] partitionExpressions: [col 0:string, IfExprColumnNull(col 5:boolean, col 6:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 5:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 6:timestamp) -> 7:timestamp] streamingColumns: [4] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -5904,13 +5904,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 3, 4] - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 14664 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index 492dfd0f27..2ceef58012 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -122,7 +122,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 183632 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 101753 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -133,19 +133,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [7, 13, 12] selectExpressions: StringGroupColConcatStringScalar(col 12:string, val )(children: StringScalarConcatStringGroupCol(val , col 7:string) -> 12:string) -> 13:string, StringGroupColConcatStringScalar(col 14:string, val |)(children: StringScalarConcatStringGroupCol(val |, col 12:string)(children: StringRTrim(col 14:string)(children: StringGroupColConcatStringScalar(col 12:string, val )(children: StringScalarConcatStringGroupCol(val , col 7:string) -> 12:string) -> 14:string) -> 12:string) -> 14:string) -> 12:string - Statistics: Num rows: 1049 Data size: 183632 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 487785 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9300 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 3500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9300 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_struct_in.q.out ql/src/test/results/clientpositive/llap/vector_struct_in.q.out index 01be8d19a1..c2b3af7551 100644 --- ql/src/test/results/clientpositive/llap/vector_struct_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_struct_in.q.out @@ -59,7 +59,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -68,7 +68,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0:string, col 1:string], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: string), lineid (type: string) outputColumnNames: _col0, _col1 @@ -76,13 +76,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -182,7 +182,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -193,13 +193,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 4] selectExpressions: StructColumnInList(structExpressions [col 0:string, col 1:string], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> 4:boolean - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -315,7 +315,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -324,7 +324,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0:int, col 1:int], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), lineid (type: int) outputColumnNames: _col0, _col1 @@ -332,13 +332,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -438,7 +438,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -449,13 +449,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 4] selectExpressions: StructColumnInList(structExpressions [col 0:int, col 1:int], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> 4:boolean - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -571,7 +571,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_3 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -580,7 +580,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0:string, col 1:int], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: string), lineid (type: int) outputColumnNames: _col0, _col1 @@ -588,13 +588,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -694,7 +694,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_3 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -705,13 +705,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 4] selectExpressions: StructColumnInList(structExpressions [col 0:string, col 1:int], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> 4:boolean - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -830,7 +830,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_4 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -839,7 +839,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0:bigint, col 1:string, col 2:double], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double) outputColumnNames: _col0, _col1, _col2 @@ -847,13 +847,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -956,7 +956,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_4 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -967,13 +967,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 5] selectExpressions: StructColumnInList(structExpressions [col 0:bigint, col 1:string, col 2:double], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> 5:boolean - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 89c2cbda65..296972379b 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -61,7 +61,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -73,19 +73,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 14] selectExpressions: StringGroupConcatColCol(col 0:string, col 1:string) -> 9:string, StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 10:varchar(30), StringGroupColEqualStringGroupColumn(col 11:string, col 13:string)(children: StringGroupConcatColCol(col 0:string, col 1:string) -> 11:string, CastStringGroupToString(col 12:varchar(30))(children: StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 12:varchar(30)) -> 13:string) -> 14:boolean - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -162,7 +162,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -174,19 +174,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 14] selectExpressions: StringUpper(col 1:string) -> 9:string, StringUpper(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 13:string)(children: StringUpper(col 1:string) -> 11:string, CastStringGroupToString(col 12:varchar(20))(children: StringUpper(col 3:varchar(20)) -> 12:varchar(20)) -> 13:string) -> 14:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -263,7 +263,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -275,19 +275,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 14] selectExpressions: StringLower(col 1:string) -> 9:string, StringLower(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 13:string)(children: StringLower(col 1:string) -> 11:string, CastStringGroupToString(col 12:varchar(20))(children: StringLower(col 3:varchar(20)) -> 12:varchar(20)) -> 13:string) -> 14:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -364,7 +364,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -376,19 +376,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(ascii(c2)) -> 9:int, VectorUDFAdaptor(ascii(c4)) -> 10:int, LongColEqualLongColumn(col 11:int, col 12:int)(children: VectorUDFAdaptor(ascii(c2)) -> 11:int, VectorUDFAdaptor(ascii(c4)) -> 12:int) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,7 +465,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -477,19 +477,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(concat_ws('|', c1, c2)) -> 9:string, VectorUDFAdaptor(concat_ws('|', c3, c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(concat_ws('|', c1, c2)) -> 11:string, VectorUDFAdaptor(concat_ws('|', c3, c4)) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -566,7 +566,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -578,19 +578,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 11, 14] selectExpressions: VectorUDFAdaptor(decode(encode(c2,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c2,'US-ASCII')) -> 9:binary) -> 10:string, VectorUDFAdaptor(decode(encode(c4,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c4,'US-ASCII')) -> 9:binary) -> 11:string, StringGroupColEqualStringGroupColumn(col 12:string, col 13:string)(children: VectorUDFAdaptor(decode(encode(c2,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c2,'US-ASCII')) -> 9:binary) -> 12:string, VectorUDFAdaptor(decode(encode(c4,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c4,'US-ASCII')) -> 9:binary) -> 13:string) -> 14:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -667,7 +667,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -679,19 +679,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(instr(c2, '_')) -> 9:int, VectorUDFAdaptor(instr(c4, '_')) -> 10:int, LongColEqualLongColumn(col 11:int, col 12:int)(children: VectorUDFAdaptor(instr(c2, '_')) -> 11:int, VectorUDFAdaptor(instr(c4, '_')) -> 12:int) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -768,7 +768,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -780,19 +780,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(replace(c1, '_', c2)) -> 9:string, VectorUDFAdaptor(replace(c3, '_', c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(replace(c1, '_', c2)) -> 11:string, VectorUDFAdaptor(replace(c3, '_', c4)) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -869,7 +869,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -881,19 +881,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(reverse(c2)) -> 9:string, VectorUDFAdaptor(reverse(c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(reverse(c2)) -> 11:string, VectorUDFAdaptor(reverse(c4)) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -970,7 +970,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -982,19 +982,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(next_day(d1, 'TU')) -> 9:string, VectorUDFAdaptor(next_day(d4, 'WE')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(next_day(d1, 'TU')) -> 11:string, VectorUDFAdaptor(next_day(d4, 'WE')) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1071,7 +1071,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -1083,19 +1083,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(months_between(d1, d3)) -> 9:double, VectorUDFAdaptor(months_between(d2, d4)) -> 10:double, DoubleColEqualDoubleColumn(col 11:double, col 12:double)(children: VectorUDFAdaptor(months_between(d1, d3)) -> 11:double, VectorUDFAdaptor(months_between(d2, d4)) -> 12:double) -> 13:boolean - Statistics: Num rows: 1 Data size: 556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1172,7 +1172,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -1184,19 +1184,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringLength(col 1:string) -> 9:int, StringLength(col 3:varchar(20)) -> 10:int, LongColEqualLongColumn(col 11:int, col 12:int)(children: StringLength(col 1:string) -> 11:int, StringLength(col 3:varchar(20)) -> 12:int) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1374,7 +1374,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -1386,19 +1386,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(lpad(c2, 15, ' ')) -> 9:string, VectorUDFAdaptor(lpad(c4, 15, ' ')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(lpad(c2, 15, ' ')) -> 11:string, VectorUDFAdaptor(lpad(c4, 15, ' ')) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1475,7 +1475,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -1487,19 +1487,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringLTrim(col 1:string) -> 9:string, StringLTrim(col 3:varchar(20)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringLTrim(col 1:string) -> 11:string, StringLTrim(col 3:varchar(20)) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1576,7 +1576,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -1588,19 +1588,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(c2 regexp 'val') -> 9:boolean, VectorUDFAdaptor(c4 regexp 'val') -> 10:boolean, LongColEqualLongColumn(col 11:boolean, col 12:boolean)(children: VectorUDFAdaptor(c2 regexp 'val') -> 11:boolean, VectorUDFAdaptor(c4 regexp 'val') -> 12:boolean) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1677,7 +1677,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -1689,19 +1689,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 9:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 11:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1778,7 +1778,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -1790,19 +1790,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 9:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 11:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1879,7 +1879,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -1891,19 +1891,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(reverse(c2)) -> 9:string, VectorUDFAdaptor(reverse(c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(reverse(c2)) -> 11:string, VectorUDFAdaptor(reverse(c4)) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1980,7 +1980,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -1992,19 +1992,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(rpad(c2, 15, ' ')) -> 9:string, VectorUDFAdaptor(rpad(c4, 15, ' ')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(rpad(c2, 15, ' ')) -> 11:string, VectorUDFAdaptor(rpad(c4, 15, ' ')) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2081,7 +2081,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -2093,19 +2093,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringRTrim(col 1:string) -> 9:string, StringRTrim(col 3:varchar(20)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringRTrim(col 1:string) -> 11:string, StringRTrim(col 3:varchar(20)) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2277,7 +2277,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -2289,19 +2289,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10] selectExpressions: VectorUDFAdaptor(split(c2, '_')) -> 9:array, VectorUDFAdaptor(split(c4, '_')) -> 10:array - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2473,7 +2473,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -2485,19 +2485,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringSubstrColStartLen(col 1:string, start 0, length 3) -> 9:string, StringSubstrColStartLen(col 3:varchar(20), start 0, length 3) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringSubstrColStartLen(col 1:string, start 0, length 3) -> 11:string, StringSubstrColStartLen(col 3:varchar(20), start 0, length 3) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2574,7 +2574,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -2586,19 +2586,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringTrim(col 1:string) -> 9:string, StringTrim(col 3:varchar(20)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringTrim(col 1:string) -> 11:string, StringTrim(col 3:varchar(20)) -> 12:string) -> 13:boolean - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2676,19 +2676,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(_col0, 16), compute_stats(_col2, 16) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs @@ -2710,10 +2710,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2772,7 +2772,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -2783,7 +2783,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3] - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(c2), min(c4) Group By Vectorization: @@ -2795,7 +2795,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -2804,7 +2804,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2850,13 +2850,13 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2915,7 +2915,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] @@ -2926,7 +2926,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3] - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(c2), max(c4) Group By Vectorization: @@ -2938,7 +2938,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -2947,7 +2947,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2993,13 +2993,13 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 76fae5c20c..181ab50408 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -51,7 +51,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 6] selectExpressions: IfExprLongScalarLongColumn(col 1:boolean, val 1, col 5:int)(children: IfExprColumnNull(col 3:boolean, col 4:int, null)(children: NotCol(col 1:boolean) -> 3:boolean, ConstantVectorExpression(val 0) -> 4:int) -> 5:int) -> 6:int - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) Group By Vectorization: @@ -65,7 +65,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -74,7 +74,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -109,13 +109,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 776ae3e085..db31b289e5 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -5300,12 +5300,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 1 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5373,6 +5376,26 @@ STAGE PLANS: dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string partitionColumnCount: 0 scratchColumnTypeNames: [] + Reducer 10 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 2 Execution mode: llap Reduce Vectorization: @@ -5431,11 +5454,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reducer 3 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: @@ -5488,7 +5544,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), _col1 (type: string) - Reducer 4 + Reducer 5 Execution mode: llap Reduce Vectorization: enabled: true @@ -5530,7 +5586,7 @@ STAGE PLANS: Map-reduce partition columns: _col5 (type: string) Statistics: Num rows: 26 Data size: 13182 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_3 (type: bigint), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col8 (type: int) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Vectorization: enabled: true @@ -5574,7 +5630,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_2 - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, cud, s2, fv1 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(s2, 'hll'), compute_stats(fv1, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -5658,7 +5747,7 @@ STAGE PLANS: valueColumnNums: [3, 4, 2] Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) - Reducer 7 + Reducer 9 Execution mode: llap Reduce Vectorization: enabled: true @@ -5702,6 +5791,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, c, ca, fv + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(c, 'hll'), compute_stats(ca, 'hll'), compute_stats(fv, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Stage: Stage-4 Dependency Collection @@ -5719,6 +5821,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_1 Stage: Stage-1 Move Operator @@ -5733,6 +5839,10 @@ STAGE PLANS: Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, cud, s2, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_2 Stage: Stage-2 Move Operator @@ -5747,6 +5857,10 @@ STAGE PLANS: Stage: Stage-7 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, c, ca, fv + Column Types: string, string, int, int, int, int + Table: default.part_3 PREHOOK: query: from part INSERT OVERWRITE TABLE part_1 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out index 6672a737db..2e6f1d1f2e 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out @@ -68,7 +68,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltable_windowing - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:i:int, 1:type:string, 2:ROW__ID:struct] @@ -83,7 +83,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [1] valueColumnNums: [] - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -113,7 +113,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1071 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -175,14 +175,14 @@ STAGE PLANS: name: count window function: GenericUDAFCountEvaluator window frame: ROWS PRECEDING(1)~FOLLOWING(7) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1071 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: int), max_window_0 (type: int), min_window_1 (type: int), first_value_window_2 (type: int), last_value_window_3 (type: int), avg_window_4 (type: double), sum_window_5 (type: bigint), collect_set_window_6 (type: array), count_window_7 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 747 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 747 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out index f1b2d5d79b..ab8752a8d2 100644 --- ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out @@ -73,20 +73,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -171,22 +171,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -213,10 +213,10 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -405,20 +405,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -498,22 +498,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs (cache only) @@ -535,10 +535,10 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -727,20 +727,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -825,22 +825,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -867,10 +867,10 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1107,20 +1107,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_orc - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cint = 528534767) (type: boolean) - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1200,22 +1200,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_orc - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs @@ -1237,10 +1237,10 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index 1b25b03ec1..e46c7f4524 100644 --- ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -70,15 +70,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_part - Statistics: Num rows: 200 Data size: 56024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cdouble + 2.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 56024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 56024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -103,13 +103,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 56024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index 1d9abb015a..61c5051bb9 100644 --- ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -120,7 +120,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -129,7 +129,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -138,7 +138,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -155,7 +155,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -164,7 +164,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -173,7 +173,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -196,14 +196,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -255,7 +255,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -264,7 +264,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -273,7 +273,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -290,7 +290,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -299,7 +299,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -308,7 +308,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -331,14 +331,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -390,7 +390,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -399,7 +399,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -408,7 +408,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -425,7 +425,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -434,7 +434,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -443,7 +443,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -467,14 +467,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 413 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 97e2093ab6..d2de89c361 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -531,14 +531,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -582,14 +582,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -633,14 +633,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -700,14 +700,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_2 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -751,14 +751,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_2 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -802,14 +802,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_2 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_context.q.out ql/src/test/results/clientpositive/llap/vectorized_context.q.out index 8d2002eb7d..8907c7f809 100644 --- ql/src/test/results/clientpositive/llap/vectorized_context.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_context.q.out @@ -115,45 +115,45 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: household_demographics - Statistics: Num rows: 6075 Data size: 23092 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + Statistics: Num rows: 6075 Data size: 72744 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: hd_demo_sk is not null (type: boolean) - Statistics: Num rows: 5772 Data size: 21940 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_hdemo_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 6075 Data size: 72744 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hd_demo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 5772 Data size: 21940 Basic stats: COMPLETE Column stats: NONE + expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6075 Data size: 72744 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col3 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 6349 Data size: 24134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6124 Data size: 49032 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5 + outputColumnNames: _col2, _col5 input vertices: 1 Map 3 - Statistics: Num rows: 6983 Data size: 26547 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6173 Data size: 625962 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col5 (type: string), _col3 (type: double) + expressions: _col5 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6983 Data size: 26547 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6173 Data size: 625962 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 10208 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 10208 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -172,21 +172,20 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 6075 Data size: 92368 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ss_hdemo_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 5469 Data size: 83154 Basic stats: COMPLETE Column stats: NONE + predicate: hd_demo_sk is not null (type: boolean) + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5469 Data size: 83154 Basic stats: COMPLETE Column stats: NONE + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 5469 Data size: 83154 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: double) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -202,19 +201,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store - Statistics: Num rows: 6075 Data size: 1085324 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 5772 Data size: 1031191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int), s_city (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5772 Data size: 1031191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5772 Data size: 1031191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vectorized_join46.q.out ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index 428208f7f7..2ad36808b7 100644 --- ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -62,11 +62,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -76,10 +76,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,16 +90,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -162,11 +162,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -179,10 +179,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -193,19 +193,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -269,11 +269,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -286,10 +286,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -300,17 +300,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -370,16 +370,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -387,11 +387,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -401,10 +401,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,11 +465,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -482,10 +482,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -496,14 +496,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -572,11 +572,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -587,10 +587,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -601,14 +601,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -680,11 +680,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -695,10 +695,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -709,14 +709,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -786,11 +786,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -801,10 +801,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,14 +815,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -888,11 +888,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -903,10 +903,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -917,16 +917,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -993,14 +993,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1008,11 +1008,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1023,10 +1023,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1101,14 +1101,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1116,11 +1116,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1131,10 +1131,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1204,14 +1204,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1219,11 +1219,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1234,10 +1234,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1308,16 +1308,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1325,11 +1325,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join 0 to 1 @@ -1340,10 +1340,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1411,14 +1411,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1426,14 +1426,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1448,10 +1448,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1524,14 +1524,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1539,14 +1539,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1561,10 +1561,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1635,14 +1635,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1650,14 +1650,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1672,10 +1672,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1744,16 +1744,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1761,16 +1761,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1785,10 +1785,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 382b152e47..1724751ffe 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -141,22 +141,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 2428400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -183,10 +183,10 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 1214200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index 22bef5a71a..489ae42013 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -274,7 +274,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_types - Statistics: Num rows: 22 Data size: 7040 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4906 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -284,7 +284,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 10] - Statistics: Num rows: 22 Data size: 7040 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4906 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble), max(cdecimal) Group By Vectorization: @@ -298,7 +298,7 @@ STAGE PLANS: keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 22 Data size: 7040 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -307,7 +307,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 22 Data size: 7040 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -342,7 +342,7 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 11 Data size: 3520 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -350,7 +350,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 11 Data size: 3520 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) Reducer 3 Execution mode: vectorized, llap @@ -368,13 +368,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 11 Data size: 3520 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 11 Data size: 3520 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 15a301a28e..9e0647144d 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -145,7 +145,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -160,7 +160,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5, 7] - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -190,7 +190,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -204,12 +204,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 Execution mode: llap @@ -222,7 +222,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -256,14 +256,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -359,7 +359,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 26 Data size: 9776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -369,7 +369,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 25 Data size: 9400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) sort order: + @@ -380,7 +380,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 2, 5] - Statistics: Num rows: 25 Data size: 9400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -403,7 +403,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -413,7 +413,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) sort order: + @@ -424,7 +424,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -452,12 +452,12 @@ STAGE PLANS: 0 p_partkey (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 27 Data size: 10340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 27 Data size: 10340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 3 Execution mode: llap @@ -470,7 +470,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 27 Data size: 10340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -484,12 +484,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 27 Data size: 10340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 27 Data size: 10340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 4 Execution mode: llap @@ -502,7 +502,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 27 Data size: 10340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -523,14 +523,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 27 Data size: 10340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 10340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6129 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 10340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6129 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -618,7 +618,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -633,7 +633,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -663,7 +663,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -677,14 +677,14 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -777,7 +777,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -792,7 +792,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5, 7] - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -822,7 +822,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -836,12 +836,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 Execution mode: llap @@ -854,7 +854,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -888,14 +888,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -996,7 +996,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -1011,7 +1011,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1041,7 +1041,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1055,12 +1055,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 3 Execution mode: llap @@ -1073,7 +1073,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1108,14 +1108,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1218,7 +1218,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -1233,7 +1233,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1263,7 +1263,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1277,21 +1277,21 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Vectorization: @@ -1304,7 +1304,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 4836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1339,14 +1339,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 4836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 13 Data size: 4836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 3107 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 4836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 3107 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1442,7 +1442,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -1457,7 +1457,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [0, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1480,7 +1480,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -1490,7 +1490,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) sort order: + @@ -1501,7 +1501,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1530,7 +1530,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1544,15 +1544,15 @@ STAGE PLANS: output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -1564,10 +1564,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 32032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 32032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1658,7 +1658,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -1668,7 +1668,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) sort order: + @@ -1679,7 +1679,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1701,7 +1701,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -1716,7 +1716,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [0, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1745,14 +1745,14 @@ STAGE PLANS: 0 p_partkey (type: int) 1 _col0 (type: int) outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 32032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 32032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 32032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1768,7 +1768,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1782,15 +1782,15 @@ STAGE PLANS: output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 29120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Stage: Stage-0 @@ -1878,7 +1878,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1893,13 +1893,13 @@ STAGE PLANS: partition by: p_mfgr raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) sort order: ++- Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map Vectorization: @@ -1919,7 +1919,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1934,12 +1934,12 @@ STAGE PLANS: partition by: _col2 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) sort order: ++- Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Vectorization: @@ -1951,7 +1951,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1972,14 +1972,14 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2072,7 +2072,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2087,13 +2087,13 @@ STAGE PLANS: partition by: p_mfgr raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: llap LLAP IO: all inputs @@ -2114,7 +2114,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2129,12 +2129,12 @@ STAGE PLANS: partition by: _col2 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 Execution mode: llap @@ -2147,7 +2147,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2181,14 +2181,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2285,7 +2285,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -2300,7 +2300,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5, 7] - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2330,7 +2330,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2344,12 +2344,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 Execution mode: llap @@ -2362,7 +2362,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2396,14 +2396,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2503,7 +2503,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -2518,7 +2518,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5, 7] - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2548,7 +2548,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2562,7 +2562,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2584,13 +2584,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 Execution mode: llap @@ -2603,7 +2603,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2625,12 +2625,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 Execution mode: llap @@ -2643,7 +2643,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2677,14 +2677,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2791,7 +2791,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -2806,7 +2806,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5, 7] - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2836,7 +2836,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2850,12 +2850,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 Execution mode: llap @@ -2868,7 +2868,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2894,14 +2894,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(2)~FOLLOWING(2) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), round(sum_window_1, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3013,7 +3013,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -3028,7 +3028,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [0, 5, 7] - Statistics: Num rows: 26 Data size: 9984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3051,7 +3051,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -3061,7 +3061,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) sort order: + @@ -3072,7 +3072,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [] - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3101,7 +3101,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3115,15 +3115,15 @@ STAGE PLANS: output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 9984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 9984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) Reducer 3 Execution mode: llap @@ -3135,12 +3135,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 28 Data size: 10982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 10982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 Execution mode: llap @@ -3153,7 +3153,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 28 Data size: 10982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 20709 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3200,14 +3200,14 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 28 Data size: 10982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 20709 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), round(sum_window_3, 2) (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 10982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 10982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 6993 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3306,7 +3306,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -3321,7 +3321,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3351,7 +3351,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3365,21 +3365,21 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3407,13 +3407,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 4836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 13 Data size: 4836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3522,7 +3522,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -3533,7 +3533,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 7] - Statistics: Num rows: 26 Data size: 9776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(p_retailprice) Group By Vectorization: @@ -3547,7 +3547,7 @@ STAGE PLANS: keys: p_mfgr (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 9776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ @@ -3559,7 +3559,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] valueColumnNums: [2] - Statistics: Num rows: 26 Data size: 9776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3591,11 +3591,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 4888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), round(_col2, 2) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 4888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3609,12 +3609,12 @@ STAGE PLANS: output shape: _col0: string, _col1: string, _col2: double partition by: _col0 raw input shape: - Statistics: Num rows: 13 Data size: 4888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 4888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double) Reducer 3 Execution mode: llap @@ -3627,7 +3627,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 4888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3647,14 +3647,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(2)~CURRENT - Statistics: Num rows: 13 Data size: 4888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), round(sum_window_0, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 4888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2678 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 4888 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2678 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3797,15 +3797,17 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -3820,7 +3822,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [2] valueColumnNums: [5, 7] - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3850,7 +3852,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3864,18 +3866,18 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reduce Output Operator key expressions: _col2 (type: string), _col5 (type: int) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 3 Execution mode: llap @@ -3888,7 +3890,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3922,31 +3924,64 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reducer 4 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3966,18 +4001,18 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: RANGE PRECEDING(5)~CURRENT - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sum_window_0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) outputColumnNames: sum_window_0, _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_0 (type: bigint), _col5 (type: int) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Vectorization: enabled: true @@ -3988,7 +4023,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col0, _col2, _col3, _col6 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4029,19 +4064,52 @@ STAGE PLANS: name: first_value window function: GenericUDAFFirstValueEvaluator window frame: ROWS PRECEDING(2)~FOLLOWING(2) - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(s2, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(fv1, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -4059,6 +4127,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 Stage: Stage-1 Move Operator @@ -4073,6 +4145,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 PREHOOK: query: from noop(on part_orc partition by p_mfgr @@ -4248,7 +4324,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -4262,7 +4338,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -4292,7 +4368,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4313,7 +4389,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4335,13 +4411,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 3 Execution mode: llap @@ -4354,7 +4430,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4376,12 +4452,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 4 Execution mode: llap @@ -4394,7 +4470,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4428,14 +4504,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4566,7 +4642,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -4580,7 +4656,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [1, 5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -4610,7 +4686,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4631,12 +4707,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 3 Execution mode: llap @@ -4649,7 +4725,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4663,12 +4739,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col5 (type: int) Reducer 4 Execution mode: llap @@ -4681,7 +4757,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4695,12 +4771,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 5 Execution mode: llap @@ -4713,7 +4789,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4747,14 +4823,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4880,7 +4956,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -4894,7 +4970,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -4924,7 +5000,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4945,12 +5021,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col5 (type: int) Reducer 3 Execution mode: llap @@ -4963,7 +5039,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4984,12 +5060,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 4 Execution mode: vectorized, llap @@ -5014,7 +5090,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0, 2] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5061,7 +5137,7 @@ STAGE PLANS: outputTypes: [int, int, bigint, string, string, int] partitionExpressions: [col 0:string] streamingColumns: [3, 4] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -5069,13 +5145,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 3, 4, 2, 5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5202,7 +5278,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -5216,7 +5292,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -5246,7 +5322,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5267,12 +5343,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col5 (type: int) Reducer 3 Execution mode: llap @@ -5285,7 +5361,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5299,7 +5375,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5314,13 +5390,13 @@ STAGE PLANS: partition by: _col2, _col1 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 4 Execution mode: llap @@ -5333,7 +5409,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5348,12 +5424,12 @@ STAGE PLANS: partition by: _col2, _col1 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 5 Execution mode: llap @@ -5366,7 +5442,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5400,14 +5476,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5535,7 +5611,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -5549,7 +5625,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -5579,7 +5655,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5593,7 +5669,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5615,13 +5691,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col5 (type: int) Reducer 3 Execution mode: llap @@ -5634,7 +5710,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5656,12 +5732,12 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 4 Execution mode: llap @@ -5674,7 +5750,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5708,14 +5784,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5837,7 +5913,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:p_partkey:int, 1:p_name:string, 2:p_mfgr:string, 3:p_brand:string, 4:p_type:string, 5:p_size:int, 6:p_container:string, 7:p_retailprice:double, 8:p_comment:string, 9:ROW__ID:struct] @@ -5851,7 +5927,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -5881,7 +5957,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5902,7 +5978,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5917,13 +5993,13 @@ STAGE PLANS: partition by: _col2, _col1 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 3 Execution mode: llap @@ -5936,7 +6012,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5951,12 +6027,12 @@ STAGE PLANS: partition by: _col2, _col1 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 4 Execution mode: vectorized, llap @@ -5981,7 +6057,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 0, 2] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6028,7 +6104,7 @@ STAGE PLANS: outputTypes: [int, int, bigint, string, string, int] partitionExpressions: [col 0:string] streamingColumns: [3, 4] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -6036,13 +6112,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 3, 4, 2, 5, 5] - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 26 Data size: 9672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index aadc435d5e..ab2ac79b12 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -40,14 +40,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -125,7 +125,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ts:timestamp, 1:ROW__ID:struct] @@ -136,7 +136,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ts), max(ts) Group By Vectorization: @@ -148,7 +148,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -157,7 +157,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: timestamp), _col1 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs @@ -203,7 +203,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: timestamp), _col1 (type: timestamp), (_col1 - _col0) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 @@ -212,13 +212,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2] selectExpressions: TimestampColSubtractTimestampColumn(col 1:timestamp, col 0:timestamp) -> 2:interval_day_time - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -262,7 +262,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ts:timestamp, 1:ROW__ID:struct] @@ -272,7 +272,7 @@ STAGE PLANS: native: true predicateExpression: FilterTimestampColumnInList(col 0:timestamp, values [0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0]) predicate: (ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) (type: boolean) - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 @@ -280,13 +280,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -350,7 +350,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ts:timestamp, 1:ROW__ID:struct] @@ -361,7 +361,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(ts) Group By Vectorization: @@ -373,7 +373,7 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -382,7 +382,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -428,7 +428,7 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), CAST( _col0 AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1 @@ -437,13 +437,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] selectExpressions: CastDoubleToTimestamp(col 0:double) -> 1:timestamp - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -490,7 +490,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ts:timestamp, 1:ROW__ID:struct] @@ -501,7 +501,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) Group By Vectorization: @@ -513,7 +513,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -522,7 +522,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -568,13 +568,13 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index e07db23780..7986494d5e 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -109,7 +109,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -120,7 +120,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 4:int, VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 5:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 6:int, VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 7:int, VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 8:int, VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 9:int, VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 10:int, VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 11:int - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -128,7 +128,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -157,13 +157,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -291,7 +291,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 7176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -302,7 +302,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11] selectExpressions: VectorUDFUnixTimeStampString(col 1:string) -> 3:bigint, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 7:int, VectorUDFWeekOfYearString(col 1:string) -> 8:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 9:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 10:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 11:int - Statistics: Num rows: 40 Data size: 7176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -310,7 +310,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 7176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -339,13 +339,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 7176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 7176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -473,7 +473,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 8736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1684 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -484,7 +484,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] selectExpressions: LongColEqualLongColumn(col 3:bigint, col 4:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 0:timestamp) -> 3:bigint, VectorUDFUnixTimeStampString(col 1:string) -> 4:bigint) -> 5:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFYearTimestamp(col 0:timestamp, field YEAR) -> 3:int, VectorUDFYearString(col 1:string, fieldStart 0, fieldLength 4) -> 4:int) -> 6:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMonthTimestamp(col 0:timestamp, field MONTH) -> 3:int, VectorUDFMonthString(col 1:string, fieldStart 5, fieldLength 2) -> 4:int) -> 7:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 8:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFDayOfMonthTimestamp(col 0:timestamp, field DAY_OF_MONTH) -> 3:int, VectorUDFDayOfMonthString(col 1:string, fieldStart 8, fieldLength 2) -> 4:int) -> 9:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFWeekOfYearTimestamp(col 0:timestamp, field WEEK_OF_YEAR) -> 3:int, VectorUDFWeekOfYearString(col 1:string) -> 4:int) -> 10:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFHourTimestamp(col 0:timestamp, field HOUR_OF_DAY) -> 3:int, VectorUDFHourString(col 1:string, fieldStart 11, fieldLength 2) -> 4:int) -> 11:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFMinuteTimestamp(col 0:timestamp, field MINUTE) -> 3:int, VectorUDFMinuteString(col 1:string, fieldStart 14, fieldLength 2) -> 4:int) -> 12:boolean, LongColEqualLongColumn(col 3:int, col 4:int)(children: VectorUDFSecondTimestamp(col 0:timestamp, field SECOND) -> 3:int, VectorUDFSecondString(col 1:string, fieldStart 17, fieldLength 2) -> 4:int) -> 13:boolean - Statistics: Num rows: 40 Data size: 8736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -492,7 +492,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 8736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -521,13 +521,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 8736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 8736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -655,7 +655,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_wrong - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 309 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -666,7 +666,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 3, 4, 5, 6, 7, 8, 9, 10] selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearString(col 0:string, fieldStart 0, fieldLength 4) -> 3:int, VectorUDFMonthString(col 0:string, fieldStart 5, fieldLength 2) -> 4:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearString(col 0:string) -> 7:int, VectorUDFHourString(col 0:string, fieldStart 11, fieldLength 2) -> 8:int, VectorUDFMinuteString(col 0:string, fieldStart 14, fieldLength 2) -> 9:int, VectorUDFSecondString(col 0:string, fieldStart 17, fieldLength 2) -> 10:int - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -674,7 +674,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -703,13 +703,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -788,7 +788,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -798,7 +798,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: @@ -810,14 +810,14 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -850,13 +850,13 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -915,7 +915,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -925,7 +925,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctimestamp1) Group By Vectorization: @@ -937,14 +937,14 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -977,7 +977,7 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: round(_col0, 3) (type: double) outputColumnNames: _col0 @@ -986,13 +986,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1] selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 3) -> 1:double - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1059,7 +1059,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1069,7 +1069,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 40 Data size: 1560 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) Group By Vectorization: @@ -1081,14 +1081,14 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1121,7 +1121,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -1130,13 +1130,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 9, 10, 11, 12, 13, 14, 15] selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 0) -> 8:double, VectorUDFAdaptor(_col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 9:boolean, VectorUDFAdaptor(_col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 10:boolean, VectorUDFAdaptor(_col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 3) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 3) -> 14:double, RoundWithNumDigitsDoubleToDouble(col 7, decimalPlaces 3) -> 15:double - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/load_dyn_part1.q.out ql/src/test/results/clientpositive/load_dyn_part1.q.out index b43bd31128..74d79f9f90 100644 --- ql/src/test/results/clientpositive/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/load_dyn_part1.q.out @@ -56,17 +56,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -90,6 +86,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -105,6 +117,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -131,6 +177,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 Stage: Stage-4 Map Reduce @@ -162,15 +212,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -187,36 +228,39 @@ STAGE PLANS: Stage: Stage-9 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 Stage: Stage-10 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/load_dyn_part10.q.out ql/src/test/results/clientpositive/load_dyn_part10.q.out index aea7798d0b..38fd78de95 100644 --- ql/src/test/results/clientpositive/load_dyn_part10.q.out +++ ql/src/test/results/clientpositive/load_dyn_part10.q.out @@ -65,6 +65,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part10 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,6 +116,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part10 PREHOOK: query: from srcpart insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git ql/src/test/results/clientpositive/load_dyn_part13.q.out ql/src/test/results/clientpositive/load_dyn_part13.q.out index 8c9df3e9f2..ce22104c95 100644 --- ql/src/test/results/clientpositive/load_dyn_part13.q.out +++ ql/src/test/results/clientpositive/load_dyn_part13.q.out @@ -84,6 +84,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -104,6 +120,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -121,6 +171,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part13 PREHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( diff --git ql/src/test/results/clientpositive/load_dyn_part14.q.out ql/src/test/results/clientpositive/load_dyn_part14.q.out index 8e951b6e3e..bb3a311925 100644 --- ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -70,7 +70,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -100,6 +99,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -111,6 +126,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -122,6 +153,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 771 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -147,6 +212,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part14 Stage: Stage-4 Map Reduce @@ -192,7 +261,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -222,7 +290,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 diff --git ql/src/test/results/clientpositive/load_dyn_part2.q.out ql/src/test/results/clientpositive/load_dyn_part2.q.out index 902212134e..b015302546 100644 --- ql/src/test/results/clientpositive/load_dyn_part2.q.out +++ ql/src/test/results/clientpositive/load_dyn_part2.q.out @@ -36,7 +36,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -67,6 +68,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,6 +101,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/load_dyn_part3.q.out ql/src/test/results/clientpositive/load_dyn_part3.q.out index c63b4519f3..823f0c7ded 100644 --- ql/src/test/results/clientpositive/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/load_dyn_part3.q.out @@ -63,6 +63,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -80,6 +114,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/load_dyn_part4.q.out ql/src/test/results/clientpositive/load_dyn_part4.q.out index cf571921fc..2ff534b038 100644 --- ql/src/test/results/clientpositive/load_dyn_part4.q.out +++ ql/src/test/results/clientpositive/load_dyn_part4.q.out @@ -73,6 +73,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -90,6 +124,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part4 PREHOOK: query: insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/load_dyn_part8.q.out ql/src/test/results/clientpositive/load_dyn_part8.q.out index 9e20fdc921..1d7e966a31 100644 --- ql/src/test/results/clientpositive/load_dyn_part8.q.out +++ ql/src/test/results/clientpositive/load_dyn_part8.q.out @@ -48,7 +48,8 @@ STAGE DEPENDENCIES: Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-1, Stage-5 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -95,6 +96,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -133,6 +153,34 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -333,6 +381,40 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -399,6 +481,87 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from srcpart insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/load_dyn_part9.q.out ql/src/test/results/clientpositive/load_dyn_part9.q.out index 77e689ca2d..a5af46dcfa 100644 --- ql/src/test/results/clientpositive/load_dyn_part9.q.out +++ ql/src/test/results/clientpositive/load_dyn_part9.q.out @@ -65,6 +65,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,6 +116,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part9 PREHOOK: query: from srcpart insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/mapreduce1.q.out ql/src/test/results/clientpositive/mapreduce1.q.out index 602ae774af..1c83fa74b6 100644 --- ql/src/test/results/clientpositive/mapreduce1.q.out +++ ql/src/test/results/clientpositive/mapreduce1.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +94,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce2.q.out ql/src/test/results/clientpositive/mapreduce2.q.out index a401ac0cc1..b1c68b87d5 100644 --- ql/src/test/results/clientpositive/mapreduce2.q.out +++ ql/src/test/results/clientpositive/mapreduce2.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +91,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce3.q.out ql/src/test/results/clientpositive/mapreduce3.q.out index abc3f094c9..b50cbc9665 100644 --- ql/src/test/results/clientpositive/mapreduce3.q.out +++ ql/src/test/results/clientpositive/mapreduce3.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +91,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce4.q.out ql/src/test/results/clientpositive/mapreduce4.q.out index 91275a7108..c61f37043b 100644 --- ql/src/test/results/clientpositive/mapreduce4.q.out +++ ql/src/test/results/clientpositive/mapreduce4.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +94,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce5.q.out ql/src/test/results/clientpositive/mapreduce5.q.out index bb1e9e42b3..a014fc0152 100644 --- ql/src/test/results/clientpositive/mapreduce5.q.out +++ ql/src/test/results/clientpositive/mapreduce5.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +85,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce6.q.out ql/src/test/results/clientpositive/mapreduce6.q.out index 1a31ebc1cb..75bb1524b3 100644 --- ql/src/test/results/clientpositive/mapreduce6.q.out +++ ql/src/test/results/clientpositive/mapreduce6.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +85,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce7.q.out ql/src/test/results/clientpositive/mapreduce7.q.out index b0c24fa8a8..a342f5c49c 100644 --- ql/src/test/results/clientpositive/mapreduce7.q.out +++ ql/src/test/results/clientpositive/mapreduce7.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 'hll'), compute_stats(v, 'hll'), compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +91,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce8.q.out ql/src/test/results/clientpositive/mapreduce8.q.out index fc866d558d..8a9567f66e 100644 --- ql/src/test/results/clientpositive/mapreduce8.q.out +++ ql/src/test/results/clientpositive/mapreduce8.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 'hll'), compute_stats(v, 'hll'), compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +94,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/merge1.q.out ql/src/test/results/clientpositive/merge1.q.out index bd8cd7bdfa..6534c6a477 100644 --- ql/src/test/results/clientpositive/merge1.q.out +++ ql/src/test/results/clientpositive/merge1.q.out @@ -19,10 +19,11 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -89,6 +105,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 Stage: Stage-3 Map Reduce @@ -120,6 +140,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table dest1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -528,6 +570,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -551,6 +619,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 Stage: Stage-3 Map Reduce @@ -630,6 +702,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -653,6 +751,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge2.q.out ql/src/test/results/clientpositive/merge2.q.out index 0d781c29ce..ae5f20bca8 100644 --- ql/src/test/results/clientpositive/merge2.q.out +++ ql/src/test/results/clientpositive/merge2.q.out @@ -19,10 +19,11 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -89,6 +105,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 Stage: Stage-3 Map Reduce @@ -120,6 +140,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table test1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -528,6 +570,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -551,6 +619,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 Stage: Stage-3 Map Reduce @@ -630,6 +702,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -653,6 +751,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge3.q.out ql/src/test/results/clientpositive/merge3.q.out index 22a2820dd3..11e704308e 100644 --- ql/src/test/results/clientpositive/merge3.q.out +++ ql/src/test/results/clientpositive/merge3.q.out @@ -2421,6 +2421,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2432,7 +2451,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2479,7 +2498,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2521,6 +2540,40 @@ STAGE PLANS: Truncated Path -> Alias: /merge_src_part/ds=2008-04-08 [merge_src_part] /merge_src_part/ds=2008-04-09 [merge_src_part] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2562,6 +2615,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false Stage: Stage-3 Map Reduce @@ -4815,8 +4873,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: + key expressions: _col2 (type: string) + null sort order: a + sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 @@ -4833,7 +4892,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4880,7 +4939,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4957,6 +5016,42 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4998,6 +5093,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge4.q.out ql/src/test/results/clientpositive/merge4.q.out index de4c593d53..60d65e8e0b 100644 --- ql/src/test/results/clientpositive/merge4.q.out +++ ql/src/test/results/clientpositive/merge4.q.out @@ -41,6 +41,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +101,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part Stage: Stage-3 Map Reduce @@ -1162,6 +1200,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1188,6 +1260,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part Stage: Stage-3 Map Reduce @@ -2764,10 +2840,11 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2788,7 +2865,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) TableScan alias: src @@ -2805,7 +2881,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Select Operator @@ -2823,6 +2898,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1 Data size: 353 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -2849,6 +2940,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part Stage: Stage-3 Map Reduce @@ -2880,6 +2975,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select * from ( select key, value, hr from srcpart where ds='2008-04-08' diff --git ql/src/test/results/clientpositive/merge_dynamic_partition.q.out ql/src/test/results/clientpositive/merge_dynamic_partition.q.out index 990e6dfb84..851ac02bac 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition.q.out @@ -77,6 +77,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +128,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part PREHOOK: query: insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08' PREHOOK: type: QUERY @@ -669,6 +707,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -695,6 +767,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Map Reduce @@ -1300,6 +1376,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1326,6 +1436,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out index ccffc89d24..aec543e130 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out @@ -99,6 +99,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -125,6 +159,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out index 68d7bbe122..076f9e8b7c 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out @@ -159,6 +159,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -185,6 +219,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index 0da56cd9fc..8b0b0e7672 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -160,6 +160,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -186,6 +220,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 45237a15b9..f9ac40db58 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -136,6 +136,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -162,6 +196,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/mm_all.q.out ql/src/test/results/clientpositive/mm_all.q.out index 5ad5957c5d..93e3a91e68 100644 --- ql/src/test/results/clientpositive/mm_all.q.out +++ ql/src/test/results/clientpositive/mm_all.q.out @@ -83,6 +83,40 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.part_mm Write Type: INSERT + Select Operator + expressions: _col0 (type: int), UDFToInteger('455') (type: int) + outputColumnNames: key, key_mm + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: key_mm (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -107,6 +141,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.part_mm Stage: Stage-3 Merge File Operator @@ -1798,7 +1836,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 1 numRows 6 rawDataSize 13 @@ -1848,7 +1886,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 2 numRows 12 rawDataSize 26 diff --git ql/src/test/results/clientpositive/multi_insert_gby.q.out ql/src/test/results/clientpositive/multi_insert_gby.q.out index 85fd8b7f5e..f45f2ef583 100644 --- ql/src/test/results/clientpositive/multi_insert_gby.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby.q.out @@ -29,9 +29,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -72,6 +74,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -93,6 +110,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -107,6 +139,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -118,9 +184,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 @@ -212,9 +296,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-0, Stage-4, Stage-6 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -253,6 +339,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -274,6 +375,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -288,6 +404,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 Stage: Stage-0 Move Operator @@ -299,9 +449,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 diff --git ql/src/test/results/clientpositive/multi_insert_gby2.q.out ql/src/test/results/clientpositive/multi_insert_gby2.q.out index ceb694052f..d7e0c3c2da 100644 --- ql/src/test/results/clientpositive/multi_insert_gby2.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby2.q.out @@ -70,6 +70,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: count + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(count, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete @@ -83,6 +103,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: double) + outputColumnNames: percentile + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(percentile, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +137,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: count + Column Types: int + Table: default.e1 Stage: Stage-1 Move Operator @@ -111,6 +155,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: percentile + Column Types: double + Table: default.e2 PREHOOK: query: FROM (select key, cast(key as double) as value from src order by key) a INSERT OVERWRITE TABLE e1 diff --git ql/src/test/results/clientpositive/multi_insert_gby3.q.out ql/src/test/results/clientpositive/multi_insert_gby3.q.out index d004e644e9..8f2081db75 100644 --- ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -39,9 +39,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -81,6 +83,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -99,6 +116,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -113,6 +145,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 Stage: Stage-1 Move Operator @@ -124,9 +190,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM (select key, cast(key as double) as keyD, value from src order by key) a @@ -145,9 +229,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -187,6 +273,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -205,6 +306,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -219,6 +335,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 Stage: Stage-1 Move Operator @@ -230,9 +380,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e1 @@ -1565,10 +1733,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -1627,6 +1797,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1641,12 +1826,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) @@ -1670,6 +1889,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1681,9 +1915,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM (select key, cast(key as double) as keyD, value from src order by key) a @@ -1706,12 +1958,15 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7, Stage-10 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7, Stage-10 + Stage-9 depends on stages: Stage-2, Stage-5, Stage-7, Stage-10 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - Stage-6 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 STAGE PLANS: Stage: Stage-3 @@ -1763,6 +2018,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -1781,6 +2051,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1795,6 +2080,48 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 Stage: Stage-1 Move Operator @@ -1806,11 +2133,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -1838,6 +2183,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(keyd, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -1849,7 +2209,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/multi_insert_gby4.q.out ql/src/test/results/clientpositive/multi_insert_gby4.q.out index 2b464d36a7..515a032fe3 100644 --- ql/src/test/results/clientpositive/multi_insert_gby4.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby4.q.out @@ -43,11 +43,14 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7, Stage-9 + Stage-8 depends on stages: Stage-2, Stage-5, Stage-7, Stage-9 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 Stage-2 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -92,6 +95,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -113,6 +131,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 490) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +167,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -148,6 +196,48 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e3 Stage: Stage-1 Move Operator @@ -159,9 +249,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -173,9 +281,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-6 - Stats Work - Basic Stats Work: + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (SELECT key, value FROM src) a INSERT OVERWRITE TABLE e1 diff --git ql/src/test/results/clientpositive/multi_insert_mixed.q.out ql/src/test/results/clientpositive/multi_insert_mixed.q.out index 05c030e0e0..ef5cfbc8d8 100644 --- ql/src/test/results/clientpositive/multi_insert_mixed.q.out +++ ql/src/test/results/clientpositive/multi_insert_mixed.q.out @@ -38,13 +38,14 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-10 Stage-6 depends on stages: Stage-3 Stage-7 depends on stages: Stage-6 Stage-1 depends on stages: Stage-7 - Stage-8 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-1, Stage-10 Stage-2 depends on stages: Stage-3 - Stage-9 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2, Stage-10 + Stage-10 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -100,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -136,6 +152,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -150,6 +186,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-6 Map Reduce @@ -197,6 +237,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -211,6 +271,10 @@ STAGE PLANS: Stage: Stage-8 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-2 Move Operator @@ -225,6 +289,32 @@ STAGE PLANS: Stage: Stage-9 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi3 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select key, count(1) group by key order by key diff --git ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out index 281a05aeb8..257310374f 100644 --- ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out +++ ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out @@ -28,9 +28,10 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-6 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-6 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -54,6 +55,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -69,6 +83,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -86,6 +128,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -100,6 +146,32 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -168,19 +240,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-11 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-1, Stage-11 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -204,6 +272,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -219,6 +300,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -245,6 +354,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -259,6 +372,10 @@ STAGE PLANS: Stage: Stage-10 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -290,44 +407,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -394,11 +494,17 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-11 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-11 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -422,6 +528,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -437,6 +556,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -454,6 +601,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -468,6 +619,71 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -538,9 +754,9 @@ STAGE DEPENDENCIES: Stage-6 Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-16 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-1, Stage-16 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -549,6 +765,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -572,6 +789,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -587,6 +817,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -613,6 +871,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -627,6 +889,10 @@ STAGE PLANS: Stage: Stage-10 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -697,6 +963,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -764,9 +1052,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -802,6 +1092,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -818,6 +1123,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Dependency Collection @@ -835,6 +1155,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -846,9 +1170,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-5 + Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -910,19 +1282,21 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1, Stage-10, Stage-17 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -958,6 +1332,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -974,6 +1363,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -1000,6 +1404,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1011,9 +1419,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 + Stage: Stage-11 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -1045,16 +1457,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1066,7 +1500,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1078,12 +1512,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1144,9 +1600,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1182,6 +1640,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1198,10 +1671,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - - Stage: Stage-4 - Dependency Collection - + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Dependency Collection + Stage: Stage-0 Move Operator tables: @@ -1215,6 +1703,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1226,9 +1718,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-5 + Stage: Stage-6 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -1290,19 +1830,21 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1, Stage-10, Stage-17 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1338,6 +1880,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1354,6 +1911,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -1380,6 +1952,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1391,9 +1967,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 + Stage: Stage-11 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -1425,16 +2005,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1446,7 +2048,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1458,12 +2060,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1524,9 +2148,10 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-6 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-6 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1552,6 +2177,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 < 20) and (_col0 > 10)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1563,6 +2201,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1583,6 +2236,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 < 20) and (_col0 > 10)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1594,6 +2260,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -1611,6 +2305,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1625,6 +2323,32 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1712,19 +2436,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-11 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-1, Stage-11 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1750,6 +2470,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 < 20) and (_col0 > 10)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1761,6 +2494,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1781,6 +2529,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 < 20) and (_col0 > 10)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1792,6 +2553,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -1818,6 +2607,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1832,6 +2625,10 @@ STAGE PLANS: Stage: Stage-10 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -1863,44 +2660,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1986,11 +2766,17 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-11 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-11 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -2016,6 +2802,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 < 20) and (_col0 > 10)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2027,6 +2826,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2047,6 +2861,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 < 20) and (_col0 > 10)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2058,6 +2885,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -2075,6 +2930,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -2089,6 +2948,71 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -2178,9 +3102,9 @@ STAGE DEPENDENCIES: Stage-6 Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-16 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-1, Stage-16 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -2189,6 +3113,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -2214,6 +3139,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 < 20) and (_col0 > 10)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2225,6 +3163,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2245,6 +3198,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 < 20) and (_col0 > 10)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2256,6 +3222,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -2282,6 +3276,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -2296,6 +3294,10 @@ STAGE PLANS: Stage: Stage-10 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -2366,6 +3368,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -3297,14 +4321,16 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-10 Stage-0 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-8, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-1, Stage-8, Stage-2, Stage-3 Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-9 + Stage-10 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-4 @@ -3328,6 +4354,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3343,46 +4382,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3400,6 +4436,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -3414,6 +4454,74 @@ STAGE PLANS: Stage: Stage-7 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -3421,7 +4529,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -3523,14 +4631,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-8, Stage-11, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-1, Stage-13, Stage-2, Stage-3 + Stage-12 depends on stages: Stage-4 , consists of Stage-9, Stage-8, Stage-10 + Stage-9 + Stage-8 + Stage-10 + Stage-11 depends on stages: Stage-10 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3554,6 +4669,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3569,46 +4697,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3626,6 +4751,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -3640,6 +4769,113 @@ STAGE PLANS: Stage: Stage-7 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-12 + Conditional Operator + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-11 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -3647,7 +4883,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-15 Map Reduce Map Operator Tree: TableScan @@ -3751,22 +4987,19 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-4, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 - Stage-12 depends on stages: Stage-1 + Stage-12 depends on stages: Stage-1, Stage-13, Stage-2, Stage-3 Stage-7 Stage-9 Stage-10 depends on stages: Stage-9 - Stage-17 depends on stages: Stage-4 , consists of Stage-14, Stage-13, Stage-15 - Stage-14 - Stage-13 - Stage-15 - Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 - Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3790,6 +5023,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3805,46 +5051,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -3871,6 +5114,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -3885,6 +5132,10 @@ STAGE PLANS: Stage: Stage-12 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-7 Map Reduce @@ -3916,44 +5167,69 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-17 - Conditional Operator - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-15 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-16 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -3961,7 +5237,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-15 Map Reduce Map Operator Tree: TableScan @@ -4065,11 +5341,11 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-19, Stage-20 Stage-0 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-18, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 - Stage-12 depends on stages: Stage-1 + Stage-12 depends on stages: Stage-1, Stage-18, Stage-2, Stage-3 Stage-7 Stage-9 Stage-10 depends on stages: Stage-9 @@ -4078,9 +5354,11 @@ STAGE DEPENDENCIES: Stage-13 Stage-15 Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-19 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-19 + Stage-20 depends on stages: Stage-19 + Stage-3 depends on stages: Stage-20 STAGE PLANS: Stage: Stage-4 @@ -4104,6 +5382,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key < 20) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -4119,46 +5410,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -4185,6 +5473,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -4199,6 +5491,10 @@ STAGE PLANS: Stage: Stage-12 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-7 Map Reduce @@ -4269,13 +5565,77 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-18 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-19 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 < 20) and (KEY._col0 > 10)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-20 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/multi_insert_union_src.q.out ql/src/test/results/clientpositive/multi_insert_union_src.q.out index 774c5cc133..55ed49d194 100644 --- ql/src/test/results/clientpositive/multi_insert_union_src.q.out +++ ql/src/test/results/clientpositive/multi_insert_union_src.q.out @@ -131,6 +131,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -145,6 +165,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-4 Map Reduce @@ -168,6 +192,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -182,6 +226,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src1 where key < 10 union all select * from src2 where key > 100) s insert overwrite table src_multi1 select key, value where key < 150 order by key diff --git ql/src/test/results/clientpositive/multi_insert_with_join2.q.out ql/src/test/results/clientpositive/multi_insert_with_join2.q.out index a554215123..8db13c0612 100644 --- ql/src/test/results/clientpositive/multi_insert_with_join2.q.out +++ ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -66,7 +66,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -118,6 +119,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -132,6 +148,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain @@ -149,7 +191,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -201,6 +244,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -215,6 +273,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id @@ -237,9 +321,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-0, Stage-4, Stage-6 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -287,6 +373,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -302,6 +403,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -316,6 +432,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 Stage: Stage-0 Move Operator @@ -327,9 +477,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id @@ -352,9 +520,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -402,6 +572,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col5 = 'Id_2') and (_col6 = 'val_104')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -417,6 +602,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -431,6 +631,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -442,9 +676,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id @@ -467,9 +719,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -523,6 +777,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col5 = 'Id_2') and (_col6 = 'val_104')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -538,6 +807,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -552,6 +836,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -563,9 +881,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id @@ -588,9 +924,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -644,6 +982,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col5 = 'Id_2') and (_col6 = 'val_104')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -659,6 +1012,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -673,6 +1041,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -684,9 +1086,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id @@ -709,9 +1129,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -777,6 +1199,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -792,6 +1229,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -806,6 +1258,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -817,9 +1303,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id @@ -842,9 +1346,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -910,6 +1416,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -925,6 +1446,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -939,6 +1475,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -950,7 +1520,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/multigroupby_singlemr.q.out ql/src/test/results/clientpositive/multigroupby_singlemr.q.out index cd030e55ab..facf00e2f8 100644 --- ql/src/test/results/clientpositive/multigroupby_singlemr.q.out +++ ql/src/test/results/clientpositive/multigroupby_singlemr.q.out @@ -51,10 +51,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -114,6 +116,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -128,12 +145,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -158,6 +209,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -169,9 +235,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN FROM TBL @@ -186,10 +270,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -249,6 +335,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -263,12 +364,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -293,6 +428,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -304,9 +454,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN FROM TBL @@ -321,10 +489,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -384,6 +554,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -398,12 +583,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -428,6 +647,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -439,9 +673,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN FROM TBL @@ -456,9 +708,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -498,6 +752,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -516,6 +785,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -530,6 +814,40 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest4 Stage: Stage-1 Move Operator @@ -541,9 +859,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 - Stage: Stage-4 - Stats Work - Basic Stats Work: + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN FROM TBL @@ -560,13 +896,16 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-8, Stage-11 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 - Stage-8 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-1, Stage-5, Stage-8, Stage-11 + Stage-10 depends on stages: Stage-2, Stage-5, Stage-8, Stage-11 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-6 + Stage-9 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-9 + Stage-11 depends on stages: Stage-9 STAGE PLANS: Stage: Stage-3 @@ -642,6 +981,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -656,12 +1010,54 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-10 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -686,6 +1082,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -697,11 +1108,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 - Stats Work - Basic Stats Work: + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -730,6 +1159,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -741,7 +1185,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 - Stats Work - Basic Stats Work: + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out index d6a8da5b15..490853c7df 100644 --- ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out +++ ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -36,7 +37,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -58,6 +58,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -72,6 +87,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY @@ -103,7 +144,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -122,7 +164,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -144,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -158,6 +214,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY @@ -198,7 +280,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -217,7 +300,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -239,6 +321,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -253,6 +350,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE `insert` SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/notable_alias1.q.out ql/src/test/results/clientpositive/notable_alias1.q.out index ff1778b66b..03f2649fc4 100644 --- ql/src/test/results/clientpositive/notable_alias1.q.out +++ ql/src/test/results/clientpositive/notable_alias1.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -60,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 'hll'), compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -74,6 +90,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', key, count(1) WHERE src.key < 100 group by key diff --git ql/src/test/results/clientpositive/notable_alias2.q.out ql/src/test/results/clientpositive/notable_alias2.q.out index 2f8b0e9b27..7186513b89 100644 --- ql/src/test/results/clientpositive/notable_alias2.q.out +++ ql/src/test/results/clientpositive/notable_alias2.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -60,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 'hll'), compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -74,6 +90,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, count(1) WHERE key < 100 group by src.key diff --git ql/src/test/results/clientpositive/orc_merge1.q.out ql/src/test/results/clientpositive/orc_merge1.q.out index 936e93fc06..cad6c00a8e 100644 --- ql/src/test/results/clientpositive/orc_merge1.q.out +++ ql/src/test/results/clientpositive/orc_merge1.q.out @@ -74,6 +74,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +125,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -150,6 +188,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -176,6 +248,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-3 Map Reduce @@ -265,6 +341,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -291,6 +401,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge10.q.out ql/src/test/results/clientpositive/orc_merge10.q.out index 94956e8565..0b8e2ddfba 100644 --- ql/src/test/results/clientpositive/orc_merge10.q.out +++ ql/src/test/results/clientpositive/orc_merge10.q.out @@ -74,6 +74,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +125,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -150,6 +188,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -176,6 +248,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-3 Map Reduce @@ -265,6 +341,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -291,6 +401,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge2.q.out ql/src/test/results/clientpositive/orc_merge2.q.out index 5e8a1f0ccb..70788fde9f 100644 --- ql/src/test/results/clientpositive/orc_merge2.q.out +++ ql/src/test/results/clientpositive/orc_merge2.q.out @@ -53,6 +53,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -80,6 +114,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge3.q.out ql/src/test/results/clientpositive/orc_merge3.q.out index e527f11795..597b00078e 100644 --- ql/src/test/results/clientpositive/orc_merge3.q.out +++ ql/src/test/results/clientpositive/orc_merge3.q.out @@ -83,6 +83,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -106,6 +132,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge4.q.out ql/src/test/results/clientpositive/orc_merge4.q.out index c063fc345e..49147b5dc6 100644 --- ql/src/test/results/clientpositive/orc_merge4.q.out +++ ql/src/test/results/clientpositive/orc_merge4.q.out @@ -101,6 +101,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -124,6 +150,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/orc_merge5.q.out ql/src/test/results/clientpositive/orc_merge5.q.out index 069b857998..d055fe7aa5 100644 --- ql/src/test/results/clientpositive/orc_merge5.q.out +++ ql/src/test/results/clientpositive/orc_merge5.q.out @@ -54,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -68,6 +94,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -140,6 +170,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -163,6 +219,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge6.q.out ql/src/test/results/clientpositive/orc_merge6.q.out index 8d11589e8d..8ab38832b0 100644 --- ql/src/test/results/clientpositive/orc_merge6.q.out +++ ql/src/test/results/clientpositive/orc_merge6.q.out @@ -54,6 +54,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -71,6 +105,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -185,6 +223,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -211,6 +283,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out index 936e93fc06..cad6c00a8e 100644 --- ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out +++ ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out @@ -74,6 +74,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +125,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -150,6 +188,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -176,6 +248,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-3 Map Reduce @@ -265,6 +341,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -291,6 +401,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge_incompat1.q.out ql/src/test/results/clientpositive/orc_merge_incompat1.q.out index 01c9df85a1..d7a607bd42 100644 --- ql/src/test/results/clientpositive/orc_merge_incompat1.q.out +++ ql/src/test/results/clientpositive/orc_merge_incompat1.q.out @@ -53,6 +53,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +93,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/orc_merge_incompat2.q.out ql/src/test/results/clientpositive/orc_merge_incompat2.q.out index 86d5d3c1a2..27e0d7ddbe 100644 --- ql/src/test/results/clientpositive/orc_merge_incompat2.q.out +++ ql/src/test/results/clientpositive/orc_merge_incompat2.q.out @@ -50,6 +50,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -66,6 +100,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/parallel.q.out ql/src/test/results/clientpositive/parallel.q.out index 31b1a0dd3d..12ad5ee874 100644 --- ql/src/test/results/clientpositive/parallel.q.out +++ ql/src/test/results/clientpositive/parallel.q.out @@ -28,9 +28,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -91,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -104,6 +121,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +150,40 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b Stage: Stage-1 Move Operator @@ -129,9 +195,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select key, value from src group by key, value) s insert overwrite table src_a select s.key, s.value group by s.key, s.value diff --git ql/src/test/results/clientpositive/parallel_join1.q.out ql/src/test/results/clientpositive/parallel_join1.q.out index 0450e1586d..3aa369c0ac 100644 --- ql/src/test/results/clientpositive/parallel_join1.q.out +++ ql/src/test/results/clientpositive/parallel_join1.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +105,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/parquet_analyze.q.out ql/src/test/results/clientpositive/parquet_analyze.q.out index 99199b0e5b..d07a093950 100644 --- ql/src/test/results/clientpositive/parquet_analyze.q.out +++ ql/src/test/results/clientpositive/parquet_analyze.q.out @@ -89,7 +89,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 700 @@ -136,7 +136,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 5952 diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index 3f69f8bf19..d5d900f5f4 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -138,7 +138,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -280,7 +280,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -327,7 +327,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -374,7 +374,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -554,7 +554,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -601,7 +601,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -745,7 +745,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -792,7 +792,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -938,7 +938,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -985,7 +985,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1032,7 +1032,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1189,7 +1189,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1236,7 +1236,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1283,7 +1283,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1447,7 +1447,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1494,7 +1494,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1621,7 +1621,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1668,7 +1668,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1835,7 +1835,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1882,7 +1882,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1929,7 +1929,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2123,7 +2123,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2170,7 +2170,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2326,7 +2326,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2589,7 +2589,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2636,7 +2636,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2888,7 +2888,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2935,7 +2935,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2982,7 +2982,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3029,7 +3029,7 @@ STAGE PLANS: partition values: ds 2000-04-11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3208,7 +3208,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3255,7 +3255,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3302,7 +3302,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3466,17 +3466,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -3522,6 +3518,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3558,6 +3570,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3569,7 +3608,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3610,6 +3649,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3652,6 +3720,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -3833,15 +3906,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -3874,186 +3938,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' @@ -4090,17 +4048,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -4129,7 +4083,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4150,6 +4104,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 2 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (key = 3) (type: boolean) @@ -4169,7 +4139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4190,6 +4160,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 3 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4201,7 +4198,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4242,6 +4239,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -4261,7 +4287,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4284,6 +4310,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -4299,7 +4330,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4329,7 +4360,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4350,7 +4381,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4386,7 +4417,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4416,7 +4447,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4437,7 +4468,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4465,15 +4496,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -4483,7 +4505,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4506,186 +4528,80 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 diff --git ql/src/test/results/clientpositive/pointlookup2.q.out ql/src/test/results/clientpositive/pointlookup2.q.out index 158e88522f..c99a3223e4 100644 --- ql/src/test/results/clientpositive/pointlookup2.q.out +++ ql/src/test/results/clientpositive/pointlookup2.q.out @@ -136,7 +136,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -183,7 +183,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -334,7 +334,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -557,7 +557,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -604,7 +604,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -821,7 +821,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -868,7 +868,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -913,7 +913,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -934,7 +934,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1136,7 +1136,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1183,7 +1183,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1230,7 +1230,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1275,7 +1275,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1296,7 +1296,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value diff --git ql/src/test/results/clientpositive/pointlookup3.q.out ql/src/test/results/clientpositive/pointlookup3.q.out index eb61e17cd9..0057d1df94 100644 --- ql/src/test/results/clientpositive/pointlookup3.q.out +++ ql/src/test/results/clientpositive/pointlookup3.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -418,7 +418,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -642,7 +642,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -690,7 +690,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -908,7 +908,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -956,7 +956,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1004,7 +1004,7 @@ STAGE PLANS: ds1 2000-04-10 ds2 2001-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/pointlookup4.q.out ql/src/test/results/clientpositive/pointlookup4.q.out index 8ef5551369..3c9cc60903 100644 --- ql/src/test/results/clientpositive/pointlookup4.q.out +++ ql/src/test/results/clientpositive/pointlookup4.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -315,7 +315,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/ppd_constant_expr.q.out ql/src/test/results/clientpositive/ppd_constant_expr.q.out index e118a4e8a2..f59ef81145 100644 --- ql/src/test/results/clientpositive/ppd_constant_expr.q.out +++ ql/src/test/results/clientpositive/ppd_constant_expr.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +92,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr Stage: Stage-3 Map Reduce @@ -180,6 +210,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -203,6 +259,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/ppd_multi_insert.q.out ql/src/test/results/clientpositive/ppd_multi_insert.q.out index 60b17ecfbd..0618d343e4 100644 --- ql/src/test/results/clientpositive/ppd_multi_insert.q.out +++ ql/src/test/results/clientpositive/ppd_multi_insert.q.out @@ -47,11 +47,14 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-6, Stage-8, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-1, Stage-6, Stage-8, Stage-10, Stage-3 + Stage-9 depends on stages: Stage-2, Stage-6, Stage-8, Stage-10, Stage-3 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -105,6 +108,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 < 200) and (_col0 >= 100)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -120,6 +138,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 < 300) and (_col0 >= 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +168,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -163,6 +212,48 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 Stage: Stage-1 Move Operator @@ -174,9 +265,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 - Stats Work - Basic Stats Work: + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -191,9 +300,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Move Operator @@ -1315,11 +1449,14 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-6, Stage-8, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-1, Stage-6, Stage-8, Stage-10, Stage-3 + Stage-9 depends on stages: Stage-2, Stage-6, Stage-8, Stage-10, Stage-3 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -1373,6 +1510,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 < 200) and (_col0 >= 100)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1388,6 +1540,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 < 300) and (_col0 >= 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1403,6 +1570,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -1431,6 +1614,48 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 Stage: Stage-1 Move Operator @@ -1442,9 +1667,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 - Stats Work - Basic Stats Work: + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -1459,9 +1702,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 - Stats Work - Basic Stats Work: + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Move Operator diff --git ql/src/test/results/clientpositive/push_or.q.out ql/src/test/results/clientpositive/push_or.q.out index dacdc40192..913fc6728b 100644 --- ql/src/test/results/clientpositive/push_or.q.out +++ ql/src/test/results/clientpositive/push_or.q.out @@ -73,7 +73,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -120,7 +120,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/quote1.q.out ql/src/test/results/clientpositive/quote1.q.out index 4a4fbd0ab7..3ecc12a4bc 100644 --- ql/src/test/results/clientpositive/quote1.q.out +++ ql/src/test/results/clientpositive/quote1.q.out @@ -46,6 +46,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) + outputColumnNames: location, type, table + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(location, 'hll'), compute_stats(type, 'hll') + keys: table (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -71,6 +105,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: location, type + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/rand_partitionpruner2.q.out ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index 1482fde260..0e8ec13893 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -72,6 +72,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -174,6 +190,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -216,6 +261,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/rcfile_default_format.q.out ql/src/test/results/clientpositive/rcfile_default_format.q.out index 74f5d28621..7eed769042 100644 --- ql/src/test/results/clientpositive/rcfile_default_format.q.out +++ ql/src/test/results/clientpositive/rcfile_default_format.q.out @@ -118,7 +118,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 1 numRows 500 rawDataSize 1406 diff --git ql/src/test/results/clientpositive/rcfile_null_value.q.out ql/src/test/results/clientpositive/rcfile_null_value.q.out index e2a8ae0c4e..2d2bef9be5 100644 --- ql/src/test/results/clientpositive/rcfile_null_value.q.out +++ ql/src/test/results/clientpositive/rcfile_null_value.q.out @@ -90,7 +90,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -149,6 +150,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.dest1_rc + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -163,6 +179,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1_rc + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/sample1.q.out ql/src/test/results/clientpositive/sample1.q.out index 72c32b4506..6d586585f5 100644 --- ql/src/test/results/clientpositive/sample1.q.out +++ ql/src/test/results/clientpositive/sample1.q.out @@ -74,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(dt, 'hll'), compute_stats(hr, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +143,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +214,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample2.q.out ql/src/test/results/clientpositive/sample2.q.out index e09fd50225..164ce43f79 100644 --- ql/src/test/results/clientpositive/sample2.q.out +++ ql/src/test/results/clientpositive/sample2.q.out @@ -73,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +142,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +213,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample4.q.out ql/src/test/results/clientpositive/sample4.q.out index 46a1c6f425..ff97dd0d6c 100644 --- ql/src/test/results/clientpositive/sample4.q.out +++ ql/src/test/results/clientpositive/sample4.q.out @@ -73,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +142,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +213,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample5.q.out ql/src/test/results/clientpositive/sample5.q.out index 15e89d0d98..6d50ff5588 100644 --- ql/src/test/results/clientpositive/sample5.q.out +++ ql/src/test/results/clientpositive/sample5.q.out @@ -74,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +214,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample6.q.out ql/src/test/results/clientpositive/sample6.q.out index c568bd2742..af8537741a 100644 --- ql/src/test/results/clientpositive/sample6.q.out +++ ql/src/test/results/clientpositive/sample6.q.out @@ -73,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +142,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +213,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample7.q.out ql/src/test/results/clientpositive/sample7.q.out index 6ad8ced7d1..f24f297ebc 100644 --- ql/src/test/results/clientpositive/sample7.q.out +++ ql/src/test/results/clientpositive/sample7.q.out @@ -74,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +214,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/skewjoin.q.out ql/src/test/results/clientpositive/skewjoin.q.out index d349f98448..1225cca95b 100644 --- ql/src/test/results/clientpositive/skewjoin.q.out +++ ql/src/test/results/clientpositive/skewjoin.q.out @@ -80,11 +80,12 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-0 - Stage-6 - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-1 , consists of Stage-7, Stage-0, Stage-3 + Stage-7 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -143,11 +144,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Conditional Operator - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: 1 @@ -161,7 +177,7 @@ STAGE PLANS: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -184,6 +200,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -200,6 +231,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/smb_mapjoin_11.q.out ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index b76e7a8221..ef48acb653 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -49,7 +49,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -93,7 +94,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -172,6 +173,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -205,6 +232,83 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY @@ -1828,7 +1932,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1937,7 +2041,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/smb_mapjoin_12.q.out ql/src/test/results/clientpositive/smb_mapjoin_12.q.out index 6def3de813..0c1f4cf1f9 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_12.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_12.q.out @@ -129,7 +129,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -207,6 +207,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY @@ -254,7 +259,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -299,7 +305,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -379,6 +385,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -413,6 +445,83 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' diff --git ql/src/test/results/clientpositive/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/smb_mapjoin_13.q.out index 49ff6355b3..3bc00c8973 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out @@ -112,7 +112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,7 +135,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -292,7 +292,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -315,7 +315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git ql/src/test/results/clientpositive/smb_mapjoin_20.q.out ql/src/test/results/clientpositive/smb_mapjoin_20.q.out index ac62af2966..f52a679022 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_20.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_20.q.out @@ -41,7 +41,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -73,6 +74,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' @@ -157,7 +207,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -189,6 +240,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -205,6 +272,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, int, string + Table: default.test_table3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' @@ -1320,7 +1420,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1352,6 +1453,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1368,4 +1485,37 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2' (type: string) + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/smb_mapjoin_21.q.out ql/src/test/results/clientpositive/smb_mapjoin_21.q.out index 8b568eb258..1fb0c8ff91 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_21.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_21.q.out @@ -78,6 +78,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -108,7 +112,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -140,6 +145,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -156,6 +177,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -186,7 +240,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -217,6 +272,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -233,6 +304,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -263,7 +367,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -295,6 +400,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -311,6 +432,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -341,7 +495,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -373,6 +528,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -389,6 +560,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -419,7 +623,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -450,6 +655,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -466,4 +687,37 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/smb_mapjoin_22.q.out ql/src/test/results/clientpositive/smb_mapjoin_22.q.out index 256a3c26e0..5958232afa 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_22.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_22.q.out @@ -74,6 +74,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 @@ -215,6 +219,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 diff --git ql/src/test/results/clientpositive/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index a2a8660c3b..b71c5b87c1 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -648,6 +648,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -671,6 +692,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out index 40c0ec5b59..5754a74478 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out @@ -101,7 +101,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 1 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out index 0740df3079..5dc2426fe4 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out @@ -75,7 +75,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -184,7 +184,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out index fc5066c0f7..28501ab1d0 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out @@ -111,7 +111,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -158,7 +158,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -259,7 +259,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -308,7 +308,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/stats0.q.out ql/src/test/results/clientpositive/stats0.q.out index e993eeb663..dc77befb04 100644 --- ql/src/test/results/clientpositive/stats0.q.out +++ ql/src/test/results/clientpositive/stats0.q.out @@ -63,6 +63,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -114,6 +130,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -147,6 +192,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true PREHOOK: query: insert overwrite table stats_non_partitioned select * from src @@ -718,6 +768,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -734,6 +818,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src @@ -1380,6 +1468,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1431,6 +1535,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1473,6 +1606,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -2229,6 +2367,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -2254,6 +2426,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/stats1.q.out ql/src/test/results/clientpositive/stats1.q.out index 2675ea5b7e..461d27ee73 100644 --- ql/src/test/results/clientpositive/stats1.q.out +++ ql/src/test/results/clientpositive/stats1.q.out @@ -75,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +105,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -106,6 +145,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable PREHOOK: query: INSERT OVERWRITE TABLE tmptable SELECT unionsrc.key, unionsrc.value @@ -178,7 +221,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 26 rawDataSize 199 diff --git ql/src/test/results/clientpositive/stats10.q.out ql/src/test/results/clientpositive/stats10.q.out index acd482bccc..75261cb97b 100644 --- ql/src/test/results/clientpositive/stats10.q.out +++ ql/src/test/results/clientpositive/stats10.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,6 +49,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket3_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -64,6 +81,39 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src @@ -414,7 +464,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -451,7 +501,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats14.q.out ql/src/test/results/clientpositive/stats14.q.out index eb8a995e29..faebe8afd8 100644 --- ql/src/test/results/clientpositive/stats14.q.out +++ ql/src/test/results/clientpositive/stats14.q.out @@ -41,7 +41,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -181,7 +181,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -219,7 +219,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats15.q.out ql/src/test/results/clientpositive/stats15.q.out index eb8a995e29..faebe8afd8 100644 --- ql/src/test/results/clientpositive/stats15.q.out +++ ql/src/test/results/clientpositive/stats15.q.out @@ -41,7 +41,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -181,7 +181,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -219,7 +219,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats18.q.out ql/src/test/results/clientpositive/stats18.q.out index dd54c0dbc3..b947f62650 100644 --- ql/src/test/results/clientpositive/stats18.q.out +++ ql/src/test/results/clientpositive/stats18.q.out @@ -37,7 +37,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats4.q.out ql/src/test/results/clientpositive/stats4.q.out index 741e47b948..a1c8dbfd06 100644 --- ql/src/test/results/clientpositive/stats4.q.out +++ ql/src/test/results/clientpositive/stats4.q.out @@ -47,17 +47,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -81,6 +77,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -96,6 +108,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -122,6 +168,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 Stage: Stage-4 Map Reduce @@ -153,15 +203,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -178,36 +219,39 @@ STAGE PLANS: Stage: Stage-9 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 Stage: Stage-10 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' @@ -2305,7 +2349,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2343,7 +2387,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2381,7 +2425,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2419,7 +2463,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out index 54566546bb..185df2d0c7 100644 --- ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out +++ ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out @@ -42,6 +42,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +101,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.tmptable Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/stats_empty_partition.q.out ql/src/test/results/clientpositive/stats_empty_partition.q.out index b1ddac310c..b679334c07 100644 --- ql/src/test/results/clientpositive/stats_empty_partition.q.out +++ ql/src/test/results/clientpositive/stats_empty_partition.q.out @@ -36,7 +36,7 @@ Database: default Table: tmptable #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 0 rawDataSize 0 diff --git ql/src/test/results/clientpositive/stats_list_bucket.q.out ql/src/test/results/clientpositive/stats_list_bucket.q.out index f6eb7993ea..369c5b8410 100644 --- ql/src/test/results/clientpositive/stats_list_bucket.q.out +++ ql/src/test/results/clientpositive/stats_list_bucket.q.out @@ -59,7 +59,7 @@ Database: default Table: stats_list_bucket #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -130,7 +130,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git ql/src/test/results/clientpositive/stats_partial_size.q.out ql/src/test/results/clientpositive/stats_partial_size.q.out index 09129f2410..2838d93dda 100644 --- ql/src/test/results/clientpositive/stats_partial_size.q.out +++ ql/src/test/results/clientpositive/stats_partial_size.q.out @@ -49,16 +49,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sample_partitioned - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: x (type: int), y (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) TableScan alias: sample @@ -83,10 +83,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/statsfs.q.out ql/src/test/results/clientpositive/statsfs.q.out index fef2040335..07d5b1c1ee 100644 --- ql/src/test/results/clientpositive/statsfs.q.out +++ ql/src/test/results/clientpositive/statsfs.q.out @@ -170,7 +170,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -207,7 +207,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -333,7 +333,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -414,7 +414,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -452,7 +452,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out index 7abb6b293d..381550a006 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -58,18 +58,19 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 + Stage-11 is a root stage + Stage-2 depends on stages: Stage-11 Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-8 Stage-6 depends on stages: Stage-2 Stage-0 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-0, Stage-8 + Stage-8 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -221,6 +222,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -235,6 +256,10 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 Stage: Stage-6 Map Reduce @@ -282,6 +307,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -296,6 +336,32 @@ STAGE PLANS: Stage: Stage-7 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b @@ -334,12 +400,13 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED +RUN: Stage-11:MAPRED RUN: Stage-2:MAPRED RUN: Stage-3:MAPRED RUN: Stage-6:MAPRED RUN: Stage-4:MAPRED RUN: Stage-0:MOVE +RUN: Stage-8:MAPRED RUN: Stage-1:MOVE RUN: Stage-7:STATS RUN: Stage-5:STATS @@ -489,7 +556,7 @@ POSTHOOK: Input: default@src_5 199 val_199 199 val_199 2 val_2 -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain from src b @@ -520,22 +587,23 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 - Stage-17 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-17 - Stage-15 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-15 + Stage-11 is a root stage + Stage-15 depends on stages: Stage-11 , consists of Stage-18, Stage-2 + Stage-18 has a backup stage: Stage-2 + Stage-14 depends on stages: Stage-18 + Stage-16 depends on stages: Stage-2, Stage-14 + Stage-4 depends on stages: Stage-16 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-16 - Stage-0 depends on stages: Stage-12 - Stage-7 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-1, Stage-8 + Stage-17 depends on stages: Stage-2, Stage-14 + Stage-13 depends on stages: Stage-17 + Stage-0 depends on stages: Stage-13 + Stage-7 depends on stages: Stage-0, Stage-8 + Stage-8 depends on stages: Stage-13 Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -578,10 +646,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-15 Conditional Operator - Stage: Stage-17 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -595,7 +663,7 @@ STAGE PLANS: 0 1 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -627,7 +695,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: sq_2:s1 @@ -689,6 +757,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -703,8 +791,12 @@ STAGE PLANS: Stage: Stage-5 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 - Stage: Stage-16 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: sq_1:a @@ -732,7 +824,7 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 _col0 (type: string), _col1 (type: string) - Stage: Stage-12 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -752,6 +844,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -768,6 +875,32 @@ STAGE PLANS: Stage: Stage-7 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Map Reduce @@ -808,7 +941,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -846,16 +979,17 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-17:MAPREDLOCAL -RUN: Stage-13:MAPRED -RUN: Stage-15:MAPREDLOCAL +RUN: Stage-11:MAPRED +RUN: Stage-15:CONDITIONAL +RUN: Stage-18:MAPREDLOCAL +RUN: Stage-14:MAPRED RUN: Stage-16:MAPREDLOCAL +RUN: Stage-17:MAPREDLOCAL RUN: Stage-4:MAPRED -RUN: Stage-12:MAPRED +RUN: Stage-13:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE +RUN: Stage-8:MAPRED RUN: Stage-5:STATS RUN: Stage-7:STATS PREHOOK: query: select * from src_4 diff --git ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out index f10af8c907..31e85f8a74 100644 --- ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out +++ ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out @@ -427,18 +427,18 @@ STAGE PLANS: TableScan alias: over10k_orc_bucketed filterExpr: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 2098 Data size: 39900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2098 Data size: 41920 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 2 Execution mode: vectorized @@ -446,10 +446,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -496,18 +496,18 @@ STAGE PLANS: TableScan alias: over10k_orc_bucketed filterExpr: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 2098 Data size: 39900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2098 Data size: 41920 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), i (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int) Execution mode: vectorized Reducer 2 @@ -516,10 +516,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -568,19 +568,19 @@ STAGE PLANS: TableScan alias: over10k_orc_bucketed filterExpr: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 2098 Data size: 1021440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2098 Data size: 706986 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((b = 4294967363) and (t < 100)) (type: boolean) - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 674 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col5 (type: float), _col6 (type: double), _col7 (type: boolean), _col8 (type: string), _col9 (type: timestamp), _col10 (type: decimal(4,2)), _col11 (type: binary) Execution mode: vectorized Reducer 2 @@ -589,10 +589,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), 0 (type: int), 4294967363 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out index a02a57c00b..a5813d8c7f 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out @@ -120,6 +120,9 @@ POSTHOOK: query: explain analyze insert overwrite table t select key from src POSTHOOK: type: QUERY Plan optimized by CBO. +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Stage-3 Stats Work{} Stage-0 @@ -128,13 +131,21 @@ Stage-3 Stage-2 Dependency Collection{} Stage-1 - Map 1 - File Output Operator [FS_2] - table:{"name:":"default.t"} - Select Operator [SEL_1] (rows=500/500 width=87) - Output:["_col0"] - TableScan [TS_0] (rows=500/500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Reducer 2 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=440) + Output:["_col0"],aggregations:["compute_stats(VALUE._col0, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.t"} + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500/500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["key"] + Please refer to the previous Select Operator [SEL_1] PREHOOK: query: select key from src limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out index e5b1d74654..eaeff5a603 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out @@ -629,20 +629,20 @@ Stage-0 Stage-1 Map 1 File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=253/480 width=190) + Merge Join Operator [MERGEJOIN_15] (rows=382/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=230/242 width=14) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=230/242 width=14) + Filter Operator [FIL_14] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=14) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=230/242 width=190) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=230/242 width=190) + Filter Operator [FIL_13] (rows=242/242 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=190) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value PREHOOK: type: QUERY @@ -671,36 +671,34 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] - Select Operator [SEL_15] (rows=278/1166 width=3) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3) - Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_25] (rows=253/480 width=3) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] - <-Select Operator [SEL_5] (rows=218/242 width=179) - Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=218/242 width=179) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=242/242 width=179) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=230/242 width=3) - Output:["_col0"] - Filter Operator [FIL_22] (rows=230/242 width=3) - predicate:key is not null - TableScan [TS_0] (rows=242/242 width=3) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=230/242 width=175) - Output:["_col1"] - Filter Operator [FIL_24] (rows=230/242 width=175) - predicate:value is not null - TableScan [TS_6] (rows=242/242 width=175) - default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Merge Join Operator [MERGEJOIN_27] (rows=604/1166 width=95) + Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_25] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242/242 width=91) + Output:["_col1"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key PREHOOK: type: QUERY @@ -730,20 +728,20 @@ Stage-0 Stage-1 Map 1 File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=253/480 width=190) + Merge Join Operator [MERGEJOIN_15] (rows=382/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=230/242 width=14) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=230/242 width=14) + Filter Operator [FIL_14] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=14) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=230/242 width=190) + TableScan [TS_3] (rows=242/242 width=4) + default@tab2,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=230/242 width=190) + Filter Operator [FIL_13] (rows=242/242 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=190) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key join tab2 s2 on s1.value=s2.value PREHOOK: type: QUERY @@ -776,36 +774,34 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] - Select Operator [SEL_15] (rows=278/1166 width=3) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3) - Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_25] (rows=253/480 width=3) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] - <-Select Operator [SEL_5] (rows=218/242 width=179) - Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=218/242 width=179) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=242/242 width=179) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=230/242 width=3) - Output:["_col0"] - Filter Operator [FIL_22] (rows=230/242 width=3) - predicate:key is not null - TableScan [TS_0] (rows=242/242 width=3) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map 4 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=230/242 width=175) - Output:["_col1"] - Filter Operator [FIL_24] (rows=230/242 width=175) - predicate:value is not null - TableScan [TS_6] (rows=242/242 width=175) - default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Merge Join Operator [MERGEJOIN_27] (rows=604/1166 width=95) + Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_25] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab2,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242/242 width=91) + Output:["_col1"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab2,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key UNION ALL @@ -857,43 +853,43 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_22] - Merge Join Operator [MERGEJOIN_38] (rows=531/1646 width=3) + Merge Join Operator [MERGEJOIN_38] (rows=1009/1646 width=8) Conds:Union 2._col0=RS_19._col0(Inner) <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=475/500 width=3) + Select Operator [SEL_17] (rows=500/500 width=4) Output:["_col0"] - Filter Operator [FIL_35] (rows=475/500 width=3) + Filter Operator [FIL_35] (rows=500/500 width=4) predicate:key is not null - TableScan [TS_15] (rows=500/500 width=3) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + TableScan [TS_15] (rows=500/500 width=4) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] Reduce Output Operator [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_36] (rows=253/480 width=3) + Merge Join Operator [MERGEJOIN_36] (rows=382/480 width=4) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_5] (rows=230/242 width=3) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_33] (rows=230/242 width=3) + Filter Operator [FIL_33] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=3) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=230/242 width=3) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_32] (rows=230/242 width=3) + Filter Operator [FIL_32] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=3) + TableScan [TS_0] (rows=242/242 width=4) Output:["key"] <-Map 6 [CONTAINS] Reduce Output Operator [RS_18] PartitionCols:_col0 - Select Operator [SEL_12] (rows=230/242 width=3) + Select Operator [SEL_12] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=230/242 width=3) + Filter Operator [FIL_34] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_10] (rows=242/242 width=3) + TableScan [TS_10] (rows=242/242 width=4) Output:["key"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value @@ -946,60 +942,58 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_28] - Merge Join Operator [MERGEJOIN_50] (rows=558/3768 width=3) + Merge Join Operator [MERGEJOIN_50] (rows=1368/3768 width=8) Conds:Union 3._col0=RS_25._col0(Inner) <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 - Select Operator [SEL_23] (rows=475/500 width=3) + Select Operator [SEL_23] (rows=500/500 width=4) Output:["_col0"] - Filter Operator [FIL_46] (rows=475/500 width=3) + Filter Operator [FIL_46] (rows=500/500 width=4) predicate:key is not null - TableScan [TS_21] (rows=500/500 width=3) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + TableScan [TS_21] (rows=500/500 width=4) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Union 3 [SIMPLE_EDGE] <-Map 8 [CONTAINS] Reduce Output Operator [RS_24] PartitionCols:_col0 - Select Operator [SEL_18] (rows=230/242 width=3) + Select Operator [SEL_18] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_45] (rows=230/242 width=3) + Filter Operator [FIL_45] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_16] (rows=242/242 width=3) + TableScan [TS_16] (rows=242/242 width=4) Output:["key"] <-Reducer 2 [CONTAINS] Reduce Output Operator [RS_24] PartitionCols:_col0 - Select Operator [SEL_15] (rows=278/1166 width=3) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_49] (rows=278/1166 width=3) - Conds:RS_12._col2=RS_13._col1(Inner),Output:["_col1"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_47] (rows=253/480 width=3) - Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col1","_col2"] - <-Select Operator [SEL_5] (rows=218/242 width=179) - Output:["_col0","_col1"] - Filter Operator [FIL_43] (rows=218/242 width=179) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=242/242 width=179) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=230/242 width=3) - Output:["_col0"] - Filter Operator [FIL_42] (rows=230/242 width=3) - predicate:key is not null - TableScan [TS_0] (rows=242/242 width=3) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=230/242 width=175) - Output:["_col1"] - Filter Operator [FIL_44] (rows=230/242 width=175) - predicate:value is not null - TableScan [TS_6] (rows=242/242 width=175) - default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + Merge Join Operator [MERGEJOIN_49] (rows=604/1166 width=4) + Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_47] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_43] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_42] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242/242 width=91) + Output:["_col1"] + Filter Operator [FIL_44] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -1113,20 +1107,23 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 13 <- Union 14 (CONTAINS) -Map 15 <- Union 14 (CONTAINS) -Map 16 <- Union 14 (CONTAINS) -Map 19 <- Union 20 (CONTAINS) -Map 21 <- Union 20 (CONTAINS) -Map 22 <- Union 20 (CONTAINS) -Map 23 <- Union 20 (CONTAINS) -Map 6 <- Union 2 (CONTAINS) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 14 (SIMPLE_EDGE) -Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 8 <- Map 18 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 20 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 16 <- Union 17 (CONTAINS) +Map 18 <- Union 17 (CONTAINS) +Map 19 <- Union 17 (CONTAINS) +Map 22 <- Union 23 (CONTAINS) +Map 24 <- Union 23 (CONTAINS) +Map 25 <- Union 23 (CONTAINS) +Map 26 <- Union 23 (CONTAINS) +Map 9 <- Union 2 (CONTAINS) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) +Reducer 15 <- Map 20 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 5 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Union 5 (CUSTOM_SIMPLE_EDGE) Stage-5 Stats Work{} @@ -1136,193 +1133,243 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Union 5 - <-Reducer 12 [CONTAINS] - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_44] (rows=2682/5421 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_122] (rows=2682/5421 width=178) - Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col1","_col4"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_37] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_112] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_35] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_121] (rows=1658/2097 width=87) - Conds:Union 14._col0=RS_39._col1(Inner),Output:["_col1"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_39] + Reducer 6 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <-Union 5 [CUSTOM_SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=192/820 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_124] (rows=192/820 width=175) + Conds:RS_69._col1=Union 23._col0(Inner),Output:["_col0","_col3"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_69] PartitionCols:_col1 - Select Operator [SEL_34] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_111] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - TableScan [TS_11] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] - Reduce Output Operator [RS_38] + Merge Join Operator [MERGEJOIN_123] (rows=39/115 width=264) + Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Select Operator [SEL_10] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_106] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_8] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_114] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_49] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] + Reduce Output Operator [RS_70] PartitionCols:_col0 - Select Operator [SEL_23] (rows=25/25 width=89) + Select Operator [SEL_54] (rows=25/25 width=89) Output:["_col0"] - Filter Operator [FIL_108] (rows=25/25 width=89) + Filter Operator [FIL_115] (rows=25/25 width=89) predicate:value is not null - TableScan [TS_21] (rows=25/25 width=89) + TableScan [TS_52] (rows=25/25 width=89) Output:["value"] - <-Map 15 [CONTAINS] - Reduce Output Operator [RS_38] + <-Map 24 [CONTAINS] + Reduce Output Operator [RS_70] PartitionCols:_col0 - Select Operator [SEL_26] (rows=500/500 width=91) + Select Operator [SEL_57] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_109] (rows=500/500 width=91) + Filter Operator [FIL_116] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_24] (rows=500/500 width=91) + TableScan [TS_55] (rows=500/500 width=91) Output:["value"] - <-Map 16 [CONTAINS] - Reduce Output Operator [RS_38] + <-Map 25 [CONTAINS] + Reduce Output Operator [RS_70] PartitionCols:_col0 - Select Operator [SEL_30] (rows=500/500 width=91) + Select Operator [SEL_61] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_110] (rows=500/500 width=91) + Filter Operator [FIL_117] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_28] (rows=500/500 width=91) + TableScan [TS_59] (rows=500/500 width=91) Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_44] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_44] - <-Reducer 4 [CONTAINS] - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_20] (rows=67/170 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_120] (rows=67/170 width=177) - Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col1","_col4"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_13] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_107] (rows=500/500 width=178) - predicate:key is not null - Please refer to the previous TableScan [TS_11] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_119] (rows=42/108 width=86) - Conds:Union 2._col0=RS_15._col1(Inner),Output:["_col1"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Select Operator [SEL_10] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_106] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_8] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_104] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_0] (rows=25/25 width=89) - Output:["value"] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_14] + <-Map 26 [CONTAINS] + Reduce Output Operator [RS_70] PartitionCols:_col0 - Select Operator [SEL_5] (rows=500/500 width=91) + Select Operator [SEL_64] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_105] (rows=500/500 width=91) + Filter Operator [FIL_118] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_3] (rows=500/500 width=91) + TableScan [TS_62] (rows=500/500 width=91) Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_20] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_20] - <-Reducer 9 [CONTAINS] - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=192/820 width=175) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_124] (rows=192/820 width=175) - Conds:RS_69._col1=Union 20._col0(Inner),Output:["_col0","_col3"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_69] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_123] (rows=39/115 width=264) - Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_66] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=2941/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=2941/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=2941/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Reducer 15 [CONTAINS] + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_44] (rows=2682/5421 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_122] (rows=2682/5421 width=178) + Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col1","_col4"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_42] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_10] - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_67] + Select Operator [SEL_37] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_112] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_35] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_121] (rows=1658/2097 width=87) + Conds:Union 17._col0=RS_39._col1(Inner),Output:["_col1"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col1 + Select Operator [SEL_34] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_111] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_11] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 17 [SIMPLE_EDGE] + <-Map 16 [CONTAINS] + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_108] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_21] (rows=25/25 width=89) + Output:["value"] + <-Map 18 [CONTAINS] + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_109] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_24] (rows=500/500 width=91) + Output:["value"] + <-Map 19 [CONTAINS] + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_30] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_110] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_28] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=2941/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_44] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=2941/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_44] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=2941/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + <-Reducer 4 [CONTAINS] + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_20] (rows=67/170 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_120] (rows=67/170 width=177) + Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col1","_col4"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0 - Select Operator [SEL_51] (rows=25/25 width=175) + Select Operator [SEL_13] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_114] (rows=25/25 width=175) + Filter Operator [FIL_107] (rows=500/500 width=178) predicate:key is not null - TableScan [TS_49] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 20 [SIMPLE_EDGE] - <-Map 19 [CONTAINS] - Reduce Output Operator [RS_70] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_115] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_52] (rows=25/25 width=89) - Output:["value"] - <-Map 21 [CONTAINS] - Reduce Output Operator [RS_70] - PartitionCols:_col0 - Select Operator [SEL_57] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_116] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_55] (rows=500/500 width=91) - Output:["value"] - <-Map 22 [CONTAINS] - Reduce Output Operator [RS_70] - PartitionCols:_col0 - Select Operator [SEL_61] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_117] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_59] (rows=500/500 width=91) - Output:["value"] - <-Map 23 [CONTAINS] - Reduce Output Operator [RS_70] - PartitionCols:_col0 - Select Operator [SEL_64] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_118] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_62] (rows=500/500 width=91) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] + Please refer to the previous TableScan [TS_11] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_119] (rows=42/108 width=86) + Conds:Union 2._col0=RS_15._col1(Inner),Output:["_col1"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_10] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_104] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_0] (rows=25/25 width=89) + Output:["value"] + <-Map 9 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_105] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_3] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=2941/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=2941/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=2941/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <- Please refer to the previous Union 5 [CUSTOM_SIMPLE_EDGE] + Reducer 8 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <- Please refer to the previous Union 5 [CUSTOM_SIMPLE_EDGE] Stage-6 Stats Work{} Stage-1 @@ -1412,26 +1459,29 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 10 <- Union 2 (CONTAINS) -Map 17 <- Union 18 (CONTAINS) -Map 22 <- Union 18 (CONTAINS) -Map 23 <- Union 20 (CONTAINS) -Map 26 <- Union 27 (CONTAINS) -Map 33 <- Union 27 (CONTAINS) -Map 34 <- Union 29 (CONTAINS) -Map 35 <- Union 31 (CONTAINS) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 16 <- Map 24 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 19 <- Union 18 (SIMPLE_EDGE), Union 20 (CONTAINS) -Reducer 21 <- Union 20 (SIMPLE_EDGE) -Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) +Map 13 <- Union 2 (CONTAINS) +Map 20 <- Union 21 (CONTAINS) +Map 25 <- Union 21 (CONTAINS) +Map 26 <- Union 23 (CONTAINS) +Map 29 <- Union 30 (CONTAINS) +Map 36 <- Union 30 (CONTAINS) +Map 37 <- Union 32 (CONTAINS) +Map 38 <- Union 34 (CONTAINS) +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 19 <- Map 27 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 22 <- Union 21 (SIMPLE_EDGE), Union 23 (CONTAINS) +Reducer 24 <- Union 23 (SIMPLE_EDGE) Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) -Reducer 32 <- Union 31 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 31 <- Union 30 (SIMPLE_EDGE), Union 32 (CONTAINS) +Reducer 33 <- Union 32 (SIMPLE_EDGE), Union 34 (CONTAINS) +Reducer 35 <- Union 34 (SIMPLE_EDGE) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) Reducer 9 <- Union 8 (SIMPLE_EDGE) @@ -1443,237 +1493,254 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Reducer 9 - File Output Operator [FS_115] - table:{"name:":"default.a"} - Group By Operator [GBY_112] (rows=2941/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 8 [SIMPLE_EDGE] - <-Reducer 13 [CONTAINS] - Reduce Output Operator [RS_111] - PartitionCols:_col0, _col1 - Select Operator [SEL_107] (rows=192/304 width=175) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_164] (rows=192/304 width=175) - Conds:RS_104._col1=RS_105._col1(Inner),Output:["_col0","_col3"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_104] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_163] (rows=39/115 width=264) - Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_101] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_146] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_12] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 25 [SIMPLE_EDGE] - SHUFFLE [RS_102] - PartitionCols:_col0 - Select Operator [SEL_70] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_68] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_105] - PartitionCols:_col1 - Select Operator [SEL_100] (rows=1525/319 width=178) - Output:["_col1"] - Group By Operator [GBY_99] (rows=1525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 31 [SIMPLE_EDGE] - <-Map 35 [CONTAINS] - Reduce Output Operator [RS_98] - PartitionCols:_col1, _col0 - Select Operator [SEL_94] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_92] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 30 [CONTAINS] - Reduce Output Operator [RS_98] - PartitionCols:_col1, _col0 - Select Operator [SEL_91] (rows=1025/319 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_90] (rows=1025/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 29 [SIMPLE_EDGE] - <-Map 34 [CONTAINS] - Reduce Output Operator [RS_89] - PartitionCols:_col1, _col0 - Select Operator [SEL_85] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_83] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 28 [CONTAINS] - Reduce Output Operator [RS_89] - PartitionCols:_col1, _col0 - Select Operator [SEL_82] (rows=525/319 width=178) + Reducer 10 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=2941/319 width=178) + Output:["key","value"] + Group By Operator [GBY_112] (rows=2941/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 8 [SIMPLE_EDGE] + <-Reducer 16 [CONTAINS] + Reduce Output Operator [RS_111] + PartitionCols:_col0, _col1 + Select Operator [SEL_107] (rows=192/304 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_164] (rows=192/304 width=175) + Conds:RS_104._col1=RS_105._col1(Inner),Output:["_col0","_col3"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_163] (rows=39/115 width=264) + Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_146] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_12] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 28 [SIMPLE_EDGE] + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_68] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col1 + Select Operator [SEL_100] (rows=1525/319 width=178) + Output:["_col1"] + Group By Operator [GBY_99] (rows=1525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 34 [SIMPLE_EDGE] + <-Map 38 [CONTAINS] + Reduce Output Operator [RS_98] + PartitionCols:_col1, _col0 + Select Operator [SEL_94] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_158] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_92] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 33 [CONTAINS] + Reduce Output Operator [RS_98] + PartitionCols:_col1, _col0 + Select Operator [SEL_91] (rows=1025/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_90] (rows=1025/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 32 [SIMPLE_EDGE] + <-Map 37 [CONTAINS] + Reduce Output Operator [RS_89] + PartitionCols:_col1, _col0 + Select Operator [SEL_85] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_83] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 31 [CONTAINS] + Reduce Output Operator [RS_89] + PartitionCols:_col1, _col0 + Select Operator [SEL_82] (rows=525/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_81] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 30 [SIMPLE_EDGE] + <-Map 29 [CONTAINS] + Reduce Output Operator [RS_80] + PartitionCols:_col1, _col0 + Select Operator [SEL_73] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_71] (rows=25/25 width=175) + Output:["key","value"] + <-Map 36 [CONTAINS] + Reduce Output Operator [RS_80] + PartitionCols:_col1, _col0 + Select Operator [SEL_76] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_74] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 7 [CONTAINS] + Reduce Output Operator [RS_111] + PartitionCols:_col0, _col1 + Group By Operator [GBY_63] (rows=2749/309 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 6 [SIMPLE_EDGE] + <-Reducer 19 [CONTAINS] + Reduce Output Operator [RS_62] + PartitionCols:_col0, _col1 + Select Operator [SEL_58] (rows=2682/1056 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_162] (rows=2682/1056 width=178) + Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col2","_col5"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_152] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_49] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_161] (rows=1658/512 width=87) + Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=500/500 width=178) Output:["_col0","_col1"] - Group By Operator [GBY_81] (rows=525/319 width=178) + Filter Operator [FIL_151] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_15] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Select Operator [SEL_45] (rows=1025/319 width=178) + Output:["_col1"] + Group By Operator [GBY_44] (rows=1025/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 27 [SIMPLE_EDGE] + <-Union 23 [SIMPLE_EDGE] <-Map 26 [CONTAINS] - Reduce Output Operator [RS_80] + Reduce Output Operator [RS_43] PartitionCols:_col1, _col0 - Select Operator [SEL_73] (rows=25/25 width=175) + Select Operator [SEL_39] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=25/25 width=175) + Filter Operator [FIL_150] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_71] (rows=25/25 width=175) + TableScan [TS_37] (rows=500/500 width=178) Output:["key","value"] - <-Map 33 [CONTAINS] - Reduce Output Operator [RS_80] + <-Reducer 22 [CONTAINS] + Reduce Output Operator [RS_43] PartitionCols:_col1, _col0 - Select Operator [SEL_76] (rows=500/500 width=178) + Select Operator [SEL_36] (rows=525/319 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=500/500 width=178) + Group By Operator [GBY_35] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 21 [SIMPLE_EDGE] + <-Map 20 [CONTAINS] + Reduce Output Operator [RS_34] + PartitionCols:_col1, _col0 + Select Operator [SEL_27] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_148] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_25] (rows=25/25 width=175) + Output:["key","value"] + <-Map 25 [CONTAINS] + Reduce Output Operator [RS_34] + PartitionCols:_col1, _col0 + Select Operator [SEL_30] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_149] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_28] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 5 [CONTAINS] + Reduce Output Operator [RS_62] + PartitionCols:_col0, _col1 + Select Operator [SEL_24] (rows=67/61 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_160] (rows=67/61 width=177) + Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col2","_col5"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_147] (rows=500/500 width=178) + predicate:key is not null + Please refer to the previous TableScan [TS_15] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_159] (rows=42/52 width=86) + Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_14] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Select Operator [SEL_11] (rows=525/319 width=178) + Output:["_col1"] + Group By Operator [GBY_10] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_9] + PartitionCols:_col1, _col0 + Select Operator [SEL_2] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_144] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_74] (rows=500/500 width=178) + TableScan [TS_0] (rows=25/25 width=175) Output:["key","value"] - <-Reducer 7 [CONTAINS] - Reduce Output Operator [RS_111] - PartitionCols:_col0, _col1 - Group By Operator [GBY_63] (rows=2749/309 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_62] - PartitionCols:_col0, _col1 - Select Operator [SEL_58] (rows=2682/1056 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_162] (rows=2682/1056 width=178) - Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col2","_col5"] - <-Map 24 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0 - Select Operator [SEL_51] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_152] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_49] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_161] (rows=1658/512 width=87) - Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_151] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - TableScan [TS_15] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col1 - Select Operator [SEL_45] (rows=1025/319 width=178) - Output:["_col1"] - Group By Operator [GBY_44] (rows=1025/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 20 [SIMPLE_EDGE] - <-Map 23 [CONTAINS] - Reduce Output Operator [RS_43] - PartitionCols:_col1, _col0 - Select Operator [SEL_39] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_37] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 19 [CONTAINS] - Reduce Output Operator [RS_43] - PartitionCols:_col1, _col0 - Select Operator [SEL_36] (rows=525/319 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_35] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 18 [SIMPLE_EDGE] - <-Map 17 [CONTAINS] - Reduce Output Operator [RS_34] - PartitionCols:_col1, _col0 - Select Operator [SEL_27] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_148] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_25] (rows=25/25 width=175) - Output:["key","value"] - <-Map 22 [CONTAINS] - Reduce Output Operator [RS_34] - PartitionCols:_col1, _col0 - Select Operator [SEL_30] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_28] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 5 [CONTAINS] - Reduce Output Operator [RS_62] - PartitionCols:_col0, _col1 - Select Operator [SEL_24] (rows=67/61 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_160] (rows=67/61 width=177) - Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col2","_col5"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_17] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_147] (rows=500/500 width=178) - predicate:key is not null - Please refer to the previous TableScan [TS_15] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_159] (rows=42/52 width=86) - Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_14] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Select Operator [SEL_11] (rows=525/319 width=178) - Output:["_col1"] - Group By Operator [GBY_10] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_9] - PartitionCols:_col1, _col0 - Select Operator [SEL_2] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_144] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_0] (rows=25/25 width=175) - Output:["key","value"] - <-Map 10 [CONTAINS] - Reduce Output Operator [RS_9] - PartitionCols:_col1, _col0 - Select Operator [SEL_5] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_145] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_3] (rows=500/500 width=178) - Output:["key","value"] - File Output Operator [FS_117] - table:{"name:":"default.b"} - Please refer to the previous Group By Operator [GBY_112] - File Output Operator [FS_119] - table:{"name:":"default.c"} - Please refer to the previous Group By Operator [GBY_112] + <-Map 13 [CONTAINS] + Reduce Output Operator [RS_9] + PartitionCols:_col1, _col0 + Select Operator [SEL_5] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_145] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_3] (rows=500/500 width=178) + Output:["key","value"] + Reducer 11 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=2941/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] + Reducer 12 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=2941/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] Stage-6 Stats Work{} Stage-1 @@ -1738,10 +1805,12 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -1751,40 +1820,54 @@ Stage-4 Stage-3 Dependency Collection{} Stage-2 - Reducer 5 - File Output Operator [FS_18] - table:{"name:":"default.dest1"} - Select Operator [SEL_16] (rows=309/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=309/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=501/310 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Select Operator [SEL_6] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_5] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Select Operator [SEL_4] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_3] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count()"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - File Output Operator [FS_24] - table:{"name:":"default.dest2"} + Reducer 6 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_18] + table:{"name:":"default.dest1"} + Select Operator [SEL_16] (rows=309/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_15] (rows=309/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=501/310 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_10] + PartitionCols:_col0, _col1 + Select Operator [SEL_6] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_10] + PartitionCols:_col0, _col1 + Select Operator [SEL_4] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_3] (rows=1/2 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=309/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=1320) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=501/310 width=456) + Output:["key","val1","val2"] Select Operator [SEL_22] (rows=501/310 width=456) Output:["_col0","_col1","_col2"] Group By Operator [GBY_21] (rows=501/310 width=280) @@ -1969,11 +2052,13 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) -Map 7 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -1983,57 +2068,73 @@ Stage-4 Stage-3 Dependency Collection{} Stage-2 - Reducer 4 - File Output Operator [FS_16] - table:{"name:":"default.dest1"} - Select Operator [SEL_14] (rows=309/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_13] (rows=309/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_12] - PartitionCols:_col0 - Select Operator [SEL_6] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_5] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_18] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_6] - <-Map 7 [CONTAINS] - Reduce Output Operator [RS_12] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_8] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_18] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_9] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_12] - PartitionCols:_col0 - Select Operator [SEL_4] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_3] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count()"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_18] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_4] Reducer 5 - File Output Operator [FS_22] - table:{"name:":"default.dest2"} - Select Operator [SEL_20] (rows=1001/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_19] (rows=1001/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_16] + table:{"name:":"default.dest1"} + Select Operator [SEL_14] (rows=309/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_13] (rows=309/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Select Operator [SEL_6] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_18] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_6] + <-Map 9 [CONTAINS] + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_8] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_18] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_9] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Select Operator [SEL_4] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_3] (rows=1/2 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_18] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_4] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=309/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_14] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=1320) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_22] + table:{"name:":"default.dest2"} + Select Operator [SEL_20] (rows=1001/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_19] (rows=1001/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=1001/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_20] Stage-5 Stats Work{} Stage-1 @@ -2088,10 +2189,12 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-4 Stats Work{} @@ -2101,47 +2204,63 @@ Stage-4 Stage-3 Dependency Collection{} Stage-2 - Reducer 4 - File Output Operator [FS_14] - table:{"name:":"default.dest1"} - Select Operator [SEL_12] (rows=309/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_11] (rows=309/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0 - Select Operator [SEL_6] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_5] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_16] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_6] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_10] - PartitionCols:_col0 - Select Operator [SEL_4] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_3] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count()"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_16] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_4] Reducer 5 - File Output Operator [FS_20] - table:{"name:":"default.dest2"} - Select Operator [SEL_18] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_17] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_14] + table:{"name:":"default.dest1"} + Select Operator [SEL_12] (rows=309/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_11] (rows=309/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_10] + PartitionCols:_col0 + Select Operator [SEL_6] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_6] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_10] + PartitionCols:_col0 + Select Operator [SEL_4] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_3] (rows=1/2 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_4] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=309/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_12] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=1320) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_20] + table:{"name:":"default.dest2"} + Select Operator [SEL_18] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_17] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=501/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_18] Stage-5 Stats Work{} Stage-1 diff --git ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index f718a8c722..6f483cb4d3 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -650,6 +650,9 @@ POSTHOOK: query: explain analyze insert overwrite table orc_merge5 select userid POSTHOOK: type: QUERY Plan optimized by CBO. +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Stage-3 Stats Work{} Stage-0 @@ -662,15 +665,23 @@ Stage-3 Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) Conditional Operator Stage-1 - Map 1 - File Output Operator [FS_3] - table:{"name:":"default.orc_merge5"} - Select Operator [SEL_2] (rows=1/3 width=352) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_4] (rows=1/3 width=352) - predicate:(userid <= 13) - TableScan [TS_0] (rows=1/15000 width=352) - default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + Reducer 2 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=2760) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_3] + table:{"name:":"default.orc_merge5"} + Select Operator [SEL_2] (rows=1/3 width=352) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_4] (rows=1/3 width=352) + predicate:(userid <= 13) + TableScan [TS_0] (rows=1/15000 width=352) + default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=1/3 width=352) + Output:["userid","string1","subtype","decimal1","ts"] + Please refer to the previous Select Operator [SEL_2] Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) @@ -825,34 +836,31 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Map 2 <- Map 1 (CUSTOM_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 + Map 2 File Output Operator [FS_10] - Select Operator [SEL_9] (rows=522/480 width=189) + Select Operator [SEL_9] (rows=391/480 width=186) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_25] (rows=522/480 width=189) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=230/242 width=190) - Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=230/242 width=190) - predicate:key is not null - TableScan [TS_0] (rows=242/242 width=190) - default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 3 [SIMPLE_EDGE] - SHUFFLE [RS_7] + Map Join Operator [MAPJOIN_25] (rows=391/480 width=186) + BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] + <-Map 1 [CUSTOM_EDGE] + MULTICAST [RS_6] PartitionCols:_col0 - Select Operator [SEL_5] (rows=475/500 width=189) + Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_14] (rows=475/500 width=189) + Filter Operator [FIL_13] (rows=242/242 width=95) predicate:key is not null - TableScan [TS_3] (rows=500/500 width=189) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Select Operator [SEL_5] (rows=500/2000 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_14] (rows=500/2000 width=95) + predicate:key is not null + TableScan [TS_3] (rows=500/2000 width=95) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/tez/explainuser_3.q.out ql/src/test/results/clientpositive/tez/explainuser_3.q.out index fa73dc603e..8b1ee28e3b 100644 --- ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -494,6 +494,9 @@ POSTHOOK: query: explain insert overwrite table orc_merge5 select userid,string1 POSTHOOK: type: QUERY Plan optimized by CBO. +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Stage-3 Stats Work{} Stage-0 @@ -506,15 +509,25 @@ Stage-3 Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) Conditional Operator Stage-1 - Map 1 vectorized - File Output Operator [FS_8] - table:{"name:":"default.orc_merge5"} - Select Operator [SEL_7] (rows=1 width=352) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_6] (rows=1 width=352) - predicate:(userid <= 13) - TableScan [TS_0] (rows=1 width=352) - default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + Reducer 2 + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=2760) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_3] + table:{"name:":"default.orc_merge5"} + Select Operator [SEL_2] (rows=1 width=352) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_4] (rows=1 width=352) + predicate:(userid <= 13) + TableScan [TS_0] (rows=1 width=352) + default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=2696) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(userid, 'hll')","compute_stats(string1, 'hll')","compute_stats(subtype, 'hll')","compute_stats(decimal1, 'hll')","compute_stats(ts, 'hll')"] + Select Operator [SEL_1] (rows=1 width=352) + Output:["userid","string1","subtype","decimal1","ts"] + Please refer to the previous Select Operator [SEL_2] Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) @@ -653,34 +666,31 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Map 2 <- Map 1 (CUSTOM_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 - File Output Operator [FS_10] - Select Operator [SEL_9] (rows=522 width=189) + Map 2 vectorized + File Output Operator [FS_34] + Select Operator [SEL_33] (rows=391 width=186) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_25] (rows=522 width=189) - Conds:RS_28._col0=RS_31._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_28] + Map Join Operator [MAPJOIN_32] (rows=391 width=186) + BucketMapJoin:true,Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] + <-Map 1 [CUSTOM_EDGE] vectorized + MULTICAST [RS_29] PartitionCols:_col0 - Select Operator [SEL_27] (rows=230 width=190) + Select Operator [SEL_28] (rows=242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=230 width=190) + Filter Operator [FIL_27] (rows=242 width=95) predicate:key is not null - TableScan [TS_0] (rows=242 width=190) - default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_31] - PartitionCols:_col0 - Select Operator [SEL_30] (rows=475 width=189) - Output:["_col0","_col1"] - Filter Operator [FIL_29] (rows=475 width=189) - predicate:key is not null - TableScan [TS_3] (rows=500 width=189) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242 width=95) + default@tab,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Select Operator [SEL_31] (rows=500 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_30] (rows=500 width=95) + predicate:key is not null + TableScan [TS_3] (rows=500 width=95) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out index 320585875d..7f7d354b6c 100644 --- ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out +++ ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out @@ -35,25 +35,25 @@ Stage-0 Stage-1 Reducer 3 File Output Operator [FS_12] - Group By Operator [GBY_10] (rows=1 width=126) + Group By Operator [GBY_10] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_9] - Group By Operator [GBY_8] (rows=1 width=126) + Group By Operator [GBY_8] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","count(_col1)","count(_col2)"] - Select Operator [SEL_6] (rows=13 width=102) + Select Operator [SEL_6] (rows=13 width=97) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_5] (rows=13 width=102) + Group By Operator [GBY_5] (rows=13 width=97) Output:["_col0","_col1","_col2","_col3"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_3] (rows=27 width=102) + Group By Operator [GBY_3] (rows=13 width=97) Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, 0 - Select Operator [SEL_1] (rows=9 width=102) + Select Operator [SEL_1] (rows=9 width=93) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=102) - default@employee,employee,Tbl:COMPLETE,Col:NONE,Output:["department_id","gender","education_level"] + TableScan [TS_0] (rows=9 width=93) + default@employee,employee,Tbl:COMPLETE,Col:COMPLETE,Output:["department_id","gender","education_level"] PREHOOK: query: select count(distinct department_id), count(distinct gender), count(distinct education_level) from employee PREHOOK: type: QUERY @@ -113,27 +113,27 @@ Stage-0 Stage-1 Reducer 3 File Output Operator [FS_12] - Select Operator [SEL_11] (rows=1 width=142) + Select Operator [SEL_11] (rows=1 width=56) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_10] (rows=1 width=142) + Group By Operator [GBY_10] (rows=1 width=40) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","count(VALUE._col4)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_9] - Group By Operator [GBY_8] (rows=1 width=142) + Group By Operator [GBY_8] (rows=1 width=40) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count(_col0)","count(_col1)","count(_col2)","count(_col3)","count(_col4)"] - Select Operator [SEL_6] (rows=22 width=102) + Select Operator [SEL_6] (rows=22 width=97) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_5] (rows=22 width=102) + Group By Operator [GBY_5] (rows=22 width=97) Output:["_col0","_col1","_col2","_col3"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_3] (rows=45 width=102) + Group By Operator [GBY_3] (rows=22 width=97) Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, 0 - Select Operator [SEL_1] (rows=9 width=102) + Select Operator [SEL_1] (rows=9 width=93) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=102) - default@employee,employee,Tbl:COMPLETE,Col:NONE,Output:["gender","department_id","education_level"] + TableScan [TS_0] (rows=9 width=93) + default@employee,employee,Tbl:COMPLETE,Col:COMPLETE,Output:["gender","department_id","education_level"] PREHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct gender), count(distinct education_level), count(distinct education_level, department_id), count(distinct department_id, education_level), count(distinct department_id, education_level, gender) from employee diff --git ql/src/test/results/clientpositive/tez/tez-tag.q.out ql/src/test/results/clientpositive/tez/tez-tag.q.out index 05e15d355d..b4e00a6332 100644 --- ql/src/test/results/clientpositive/tez/tez-tag.q.out +++ ql/src/test/results/clientpositive/tez/tez-tag.q.out @@ -190,31 +190,31 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_17] Group By Operator [GBY_16] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_30] (rows=522 width=3) + Merge Join Operator [MERGEJOIN_30] (rows=63 width=8) Conds:RS_12._col0=RS_13._col0(Inner) <-Map 6 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=475 width=3) + Select Operator [SEL_8] (rows=500 width=4) Output:["_col0"] - Filter Operator [FIL_28] (rows=475 width=3) + Filter Operator [FIL_28] (rows=500 width=4) predicate:key is not null - TableScan [TS_6] (rows=500 width=3) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + TableScan [TS_6] (rows=500 width=4) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_29] (rows=239 width=180) + Merge Join Operator [MERGEJOIN_29] (rows=39 width=4) Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_9] PartitionCols:_col1 - Select Operator [SEL_2] (rows=218 width=179) + Select Operator [SEL_2] (rows=242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=218 width=179) + Filter Operator [FIL_26] (rows=242 width=95) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242 width=179) - default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242 width=95) + default@tab,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 5 [SIMPLE_EDGE] SHUFFLE [RS_10] PartitionCols:_col0 diff --git ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out index 369bc62d62..4842f7dd7f 100644 --- ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out @@ -110,28 +110,28 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_21] (rows=2 width=599) + Merge Join Operator [MERGEJOIN_21] (rows=2 width=429) Conds:RS_23._col2=RS_28._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_23] PartitionCols:_col2 - Select Operator [SEL_22] (rows=2 width=322) + Select Operator [SEL_22] (rows=2 width=237) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=2 width=424) - default@char_tbl1,c1,Tbl:COMPLETE,Col:PARTIAL,Output:["name","age"] - Dynamic Partitioning Event Operator [EVENT_26] (rows=1 width=322) - Group By Operator [GBY_25] (rows=1 width=322) + TableScan [TS_0] (rows=2 width=237) + default@char_tbl1,c1,Tbl:COMPLETE,Col:COMPLETE,Output:["name","age"] + Dynamic Partitioning Event Operator [EVENT_26] (rows=1 width=237) + Group By Operator [GBY_25] (rows=1 width=237) Output:["_col0"],keys:_col0 - Select Operator [SEL_24] (rows=2 width=322) + Select Operator [SEL_24] (rows=2 width=237) Output:["_col0"] Please refer to the previous Select Operator [SEL_22] <-Map 3 [SIMPLE_EDGE] vectorized SHUFFLE [RS_28] PartitionCols:_col2 - Select Operator [SEL_27] (rows=2 width=277) + Select Operator [SEL_27] (rows=2 width=192) Output:["_col0","_col1","_col2"] - TableScan [TS_3] (rows=2 width=378) - default@char_tbl2,c2,Tbl:COMPLETE,Col:PARTIAL,Output:["name","age"] + TableScan [TS_3] (rows=2 width=192) + default@char_tbl2,c2,Tbl:COMPLETE,Col:COMPLETE,Output:["name","age"] PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out index 80e73d27bd..a25114f8d9 100644 --- ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out +++ ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out @@ -51,7 +51,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 363126 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3073 Data size: 24584 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -60,7 +60,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 0) predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 3073 Data size: 23976 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint) outputColumnNames: _col0, _col1 @@ -68,7 +68,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 4] - Statistics: Num rows: 3073 Data size: 23976 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -76,7 +76,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3073 Data size: 23976 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: tinyint) Execution mode: vectorized @@ -105,19 +105,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 3073 Data size: 23976 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -173,7 +173,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 888298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3073 Data size: 313446 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -182,7 +182,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 0) predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 3073 Data size: 888298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 @@ -190,7 +190,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 3073 Data size: 888298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -198,7 +198,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3073 Data size: 888298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -226,19 +226,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 3073 Data size: 888298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 2890 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 2890 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/truncate_column.q.out ql/src/test/results/clientpositive/truncate_column.q.out index 3a3ea8c9a4..8c00c57289 100644 --- ql/src/test/results/clientpositive/truncate_column.q.out +++ ql/src/test/results/clientpositive/truncate_column.q.out @@ -35,7 +35,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 @@ -292,7 +292,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 1 numRows 10 @@ -486,7 +486,7 @@ Database: default Table: test_tab_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 diff --git ql/src/test/results/clientpositive/udf1.q.out ql/src/test/results/clientpositive/udf1.q.out index b59649be8b..27dbfa47d9 100644 --- ql/src/test/results/clientpositive/udf1.q.out +++ ql/src/test/results/clientpositive/udf1.q.out @@ -64,6 +64,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll'), compute_stats(c12, 'hll'), compute_stats(c13, 'hll'), compute_stats(c14, 'hll'), compute_stats(c15, 'hll'), compute_stats(c16, 'hll'), compute_stats(c17, 'hll'), compute_stats(c18, 'hll'), compute_stats(c19, 'hll'), compute_stats(c20, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: struct), _col16 (type: struct), _col17 (type: struct), _col18 (type: struct), _col19 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8), compute_stats(VALUE._col9), compute_stats(VALUE._col10), compute_stats(VALUE._col11), compute_stats(VALUE._col12), compute_stats(VALUE._col13), compute_stats(VALUE._col14), compute_stats(VALUE._col15), compute_stats(VALUE._col16), compute_stats(VALUE._col17), compute_stats(VALUE._col18), compute_stats(VALUE._col19) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +113,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Column Types: string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf3.q.out ql/src/test/results/clientpositive/udf3.q.out index e52a827862..9f9f56fe02 100644 --- ql/src/test/results/clientpositive/udf3.q.out +++ ql/src/test/results/clientpositive/udf3.q.out @@ -55,6 +55,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: c1, c2, c3, c4, c5 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +89,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5 + Column Types: string, string, string, string, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(CAST('' AS INT)), sum(CAST('' AS INT)), avg(CAST('' AS INT)), min(CAST('' AS INT)), max(CAST('' AS INT)) diff --git ql/src/test/results/clientpositive/udf_10_trims.q.out ql/src/test/results/clientpositive/udf_10_trims.q.out index d15dbc219e..4f08014ee2 100644 --- ql/src/test/results/clientpositive/udf_10_trims.q.out +++ ql/src/test/results/clientpositive/udf_10_trims.q.out @@ -50,6 +50,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -73,6 +99,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf_character_length.q.out ql/src/test/results/clientpositive/udf_character_length.q.out index 47ed6787ff..375a86f8c7 100644 --- ql/src/test/results/clientpositive/udf_character_length.q.out +++ ql/src/test/results/clientpositive/udf_character_length.q.out @@ -71,6 +71,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -94,6 +120,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf_length.q.out ql/src/test/results/clientpositive/udf_length.q.out index e21df147b2..0c7d952a0c 100644 --- ql/src/test/results/clientpositive/udf_length.q.out +++ ql/src/test/results/clientpositive/udf_length.q.out @@ -54,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +103,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf_octet_length.q.out ql/src/test/results/clientpositive/udf_octet_length.q.out index 4d742b1bc0..220ccfb34e 100644 --- ql/src/test/results/clientpositive/udf_octet_length.q.out +++ ql/src/test/results/clientpositive/udf_octet_length.q.out @@ -54,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +103,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf_reverse.q.out ql/src/test/results/clientpositive/udf_reverse.q.out index 316780e89e..abe326c9d6 100644 --- ql/src/test/results/clientpositive/udf_reverse.q.out +++ ql/src/test/results/clientpositive/udf_reverse.q.out @@ -54,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +103,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/union10.q.out ql/src/test/results/clientpositive/union10.q.out index f5e2b25801..0bb9ccd1e5 100644 --- ql/src/test/results/clientpositive/union10.q.out +++ ql/src/test/results/clientpositive/union10.q.out @@ -88,6 +88,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +116,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +144,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -141,6 +193,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union12.q.out ql/src/test/results/clientpositive/union12.q.out index a6cd633e3b..93feadf454 100644 --- ql/src/test/results/clientpositive/union12.q.out +++ ql/src/test/results/clientpositive/union12.q.out @@ -88,6 +88,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +116,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +144,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -141,6 +193,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union17.q.out ql/src/test/results/clientpositive/union17.q.out index d997c29444..828ab0b93f 100644 --- ql/src/test/results/clientpositive/union17.q.out +++ ql/src/test/results/clientpositive/union17.q.out @@ -32,10 +32,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-8 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1, Stage-5, Stage-8 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -152,6 +154,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -166,12 +183,46 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -195,6 +246,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -206,9 +272,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 - Stats Work - Basic Stats Work: + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/union18.q.out ql/src/test/results/clientpositive/union18.q.out index 5f737f68fa..1e7473ec27 100644 --- ql/src/test/results/clientpositive/union18.q.out +++ ql/src/test/results/clientpositive/union18.q.out @@ -34,17 +34,13 @@ STAGE DEPENDENCIES: Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-11 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 - Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-1 depends on stages: Stage-3 + Stage-10 depends on stages: Stage-1, Stage-11 + Stage-11 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -96,6 +92,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -107,6 +116,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -125,6 +149,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -136,6 +173,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -159,6 +224,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-5 Map Reduce @@ -190,15 +259,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -212,36 +272,32 @@ STAGE PLANS: Stage: Stage-10 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/union19.q.out ql/src/test/results/clientpositive/union19.q.out index e4ff0d5b74..fa0e7dfb8d 100644 --- ql/src/test/results/clientpositive/union19.q.out +++ ql/src/test/results/clientpositive/union19.q.out @@ -32,9 +32,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -101,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +151,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -167,6 +214,40 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 Stage: Stage-1 Move Operator @@ -178,9 +259,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/union22.q.out ql/src/test/results/clientpositive/union22.q.out index e4b4a0914b..ce4fa62359 100644 --- ql/src/test/results/clientpositive/union22.q.out +++ ql/src/test/results/clientpositive/union22.q.out @@ -97,7 +97,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -225,7 +225,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -272,7 +272,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -361,6 +361,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -395,6 +414,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -428,7 +466,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -470,6 +508,40 @@ STAGE PLANS: Truncated Path -> Alias: /dst_union22_delta/ds=1 [null-subquery1:$hdt$_0-subquery1:dst_union22_delta] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -502,6 +574,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false Stage: Stage-4 Map Reduce @@ -559,7 +636,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -606,7 +683,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 diff --git ql/src/test/results/clientpositive/union28.q.out ql/src/test/results/clientpositive/union28.q.out index 9b203f5461..f6b4b1c95c 100644 --- ql/src/test/results/clientpositive/union28.q.out +++ ql/src/test/results/clientpositive/union28.q.out @@ -102,6 +102,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -117,6 +130,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +158,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -155,6 +207,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union29.q.out ql/src/test/results/clientpositive/union29.q.out index b933426d3f..0eea64f3d6 100644 --- ql/src/test/results/clientpositive/union29.q.out +++ ql/src/test/results/clientpositive/union29.q.out @@ -67,6 +67,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -88,6 +101,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -109,6 +135,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -132,6 +184,10 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/union30.q.out ql/src/test/results/clientpositive/union30.q.out index 3ee716bffa..76b68a6e37 100644 --- ql/src/test/results/clientpositive/union30.q.out +++ ql/src/test/results/clientpositive/union30.q.out @@ -116,6 +116,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -137,6 +150,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -152,6 +178,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -167,6 +206,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -190,6 +255,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union31.q.out ql/src/test/results/clientpositive/union31.q.out index 4edaed63b8..6de0b40f29 100644 --- ql/src/test/results/clientpositive/union31.q.out +++ ql/src/test/results/clientpositive/union31.q.out @@ -71,10 +71,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -181,6 +183,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -195,12 +212,46 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -225,6 +276,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -236,9 +302,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from t1 @@ -342,12 +426,14 @@ insert overwrite table t6 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2, Stage-6 + Stage-3 depends on stages: Stage-2, Stage-8 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - Stage-6 is a root stage + Stage-7 depends on stages: Stage-3 + Stage-8 is a root stage STAGE PLANS: Stage: Stage-2 @@ -428,6 +514,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -446,6 +547,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -460,6 +576,40 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 Stage: Stage-1 Move Operator @@ -471,11 +621,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -657,9 +825,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -756,6 +926,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -774,6 +959,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -788,6 +988,40 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 Stage: Stage-1 Move Operator @@ -799,9 +1033,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 - Stage: Stage-5 - Stats Work - Basic Stats Work: + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from ( diff --git ql/src/test/results/clientpositive/union33.q.out ql/src/test/results/clientpositive/union33.q.out index 2709d99784..d12db7951b 100644 --- ql/src/test/results/clientpositive/union33.q.out +++ ql/src/test/results/clientpositive/union33.q.out @@ -124,6 +124,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +148,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -158,6 +197,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src Stage: Stage-4 Map Reduce @@ -331,6 +374,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -351,6 +407,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -374,6 +456,10 @@ STAGE PLANS: Stage: Stage-4 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src Stage: Stage-5 Map Reduce diff --git ql/src/test/results/clientpositive/union4.q.out ql/src/test/results/clientpositive/union4.q.out index cdaa78feb5..948b2b0d4b 100644 --- ql/src/test/results/clientpositive/union4.q.out +++ ql/src/test/results/clientpositive/union4.q.out @@ -83,6 +83,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE @@ -98,6 +111,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -121,6 +160,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union6.q.out ql/src/test/results/clientpositive/union6.q.out index f7a578c654..ddfd54fd2e 100644 --- ql/src/test/results/clientpositive/union6.q.out +++ ql/src/test/results/clientpositive/union6.q.out @@ -78,6 +78,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +108,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -118,6 +157,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union_lateralview.q.out ql/src/test/results/clientpositive/union_lateralview.q.out index c45bf7580f..6cd7ac4860 100644 --- ql/src/test/results/clientpositive/union_lateralview.q.out +++ ql/src/test/results/clientpositive/union_lateralview.q.out @@ -47,7 +47,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -178,6 +179,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_union_lateral_view + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, arr_ele, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(arr_ele, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -192,6 +208,32 @@ STAGE PLANS: Stage: Stage-2 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, arr_ele, value + Column Types: int, int, string + Table: default.test_union_lateral_view + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view SELECT b.key, d.arr_ele, d.value diff --git ql/src/test/results/clientpositive/union_stats.q.out ql/src/test/results/clientpositive/union_stats.q.out index 6585339adc..5b6ada76e9 100644 --- ql/src/test/results/clientpositive/union_stats.q.out +++ ql/src/test/results/clientpositive/union_stats.q.out @@ -484,7 +484,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 @@ -518,7 +518,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 diff --git ql/src/test/results/clientpositive/vector_groupby4.q.out ql/src/test/results/clientpositive/vector_groupby4.q.out index d870e84969..443385b9f3 100644 --- ql/src/test/results/clientpositive/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -34,7 +34,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -135,6 +137,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -149,6 +161,96 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) diff --git ql/src/test/results/clientpositive/vector_groupby6.q.out ql/src/test/results/clientpositive/vector_groupby6.q.out index 43f98d229b..af206f35a8 100644 --- ql/src/test/results/clientpositive/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -34,7 +34,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -135,6 +137,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -149,6 +161,96 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) diff --git ql/src/test/results/clientpositive/vectorized_context.q.out ql/src/test/results/clientpositive/vectorized_context.q.out index ad1b0ce7a9..539223bb21 100644 --- ql/src/test/results/clientpositive/vectorized_context.q.out +++ ql/src/test/results/clientpositive/vectorized_context.q.out @@ -109,14 +109,14 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:household_demographics + $hdt$_1:household_demographics Fetch Operator limit: -1 $hdt$_2:store Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:household_demographics + $hdt$_1:household_demographics TableScan alias: household_demographics Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE @@ -129,8 +129,8 @@ STAGE PLANS: Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col0 (type: int) - 1 _col1 (type: int) + 0 _col1 (type: int) + 1 _col0 (type: int) $hdt$_2:store TableScan alias: store @@ -144,7 +144,7 @@ STAGE PLANS: Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Stage: Stage-5 @@ -164,28 +164,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 6682 Data size: 26730 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 6682 Data size: 80009 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5 - Statistics: Num rows: 7350 Data size: 29403 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col5 + Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col3 (type: double) + expressions: _col5 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7350 Data size: 29403 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat