diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index d0adc35544..deac886615 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2216,6 +2216,16 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "When enabled dynamic partitioning column will be globally sorted.\n" + "This way we can keep only one record writer open for each partition value\n" + "in the reducer thereby reducing the memory pressure on reducers."), + HIVEOPTSORTDYNAMICPARTITIONTHRESHOLD("hive.optimize.sort.dynamic.partition.threshold", 0, + "When enabled dynamic partitioning column will be globally sorted.\n" + + "This way we can keep only one record writer open for each partition value\n" + + "in the reducer thereby reducing the memory pressure on reducers.\n" + + "This config has following possible values: \n" + + "\t-1 - This completely disables the optimization. \n" + + "\t1 - This always enable the optimization. \n" + + "\t0 - This makes the optimization a cost based decision. \n" + + "Setting it to any other positive integer will make Hive use this as threshold for number of writers."), + HIVESAMPLINGFORORDERBY("hive.optimize.sampling.orderby", false, "Uses sampling on order-by clause for parallel execution."), HIVESAMPLINGNUMBERFORORDERBY("hive.optimize.sampling.orderby.number", 1000, "Total number of samples to be obtained."), diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out index b42f966369..80dbbee5ba 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out @@ -84,7 +84,8 @@ POSTHOOK: Input: _dummy_database@_dummy_table STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -107,13 +108,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string), _bucket_number (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col1 (type: string) + null sort order: + sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -159,16 +159,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), KEY._col1 (type: string), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _bucket_number - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 directory: ### BLOBSTORE_STAGING_PATH ### - Dp Sort State: PARTITION_BUCKET_SORTED NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -195,6 +194,34 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id, key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id, 'hll') + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types string,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -236,6 +263,83 @@ STAGE PLANS: Table: default.table1 Is Table Level Stats: false + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types string,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types string,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@table1 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out index 13d64fb9fa..a8cdc8fe05 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out @@ -102,7 +102,8 @@ POSTHOOK: Input: _dummy_database@_dummy_table STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -125,13 +126,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string), _bucket_number (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col1 (type: string) + null sort order: + sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - value expressions: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -177,16 +177,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), KEY._col1 (type: string), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _bucket_number - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 directory: ### BLOBSTORE_STAGING_PATH ### - Dp Sort State: PARTITION_BUCKET_SORTED NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -213,6 +212,34 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id, key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id, 'hll') + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types string,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -254,6 +281,83 @@ STAGE PLANS: Table: default.table1 Is Table Level Stats: false + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types string,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types string,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@table1 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/orc_format_part.q.out b/itests/hive-blobstore/src/test/results/clientpositive/orc_format_part.q.out index 7b2561358a..826fae9d42 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/orc_format_part.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/orc_format_part.q.out @@ -143,7 +143,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -200 +0 PREHOOK: query: SELECT COUNT(*) FROM orc_events WHERE run_date=20120921 PREHOOK: type: QUERY PREHOOK: Input: default@orc_events @@ -152,7 +152,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events WHERE run_date=20120921 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -50 +0 PREHOOK: query: SELECT COUNT(*) FROM orc_events WHERE run_date=20121121 PREHOOK: type: QUERY PREHOOK: Input: default@orc_events @@ -161,7 +161,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events WHERE run_date=20121121 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -100 +0 PREHOOK: query: INSERT OVERWRITE TABLE orc_events PARTITION (run_date=201211, game_id, event_name) SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid,game_id,event_name FROM src_events WHERE SUBSTR(run_date,1,6)='201211' @@ -200,7 +200,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -300 +0 PREHOOK: query: INSERT INTO TABLE orc_events PARTITION (run_date=201209, game_id=39, event_name) SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid,event_name FROM src_events WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 @@ -229,7 +229,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -350 +0 PREHOOK: query: INSERT INTO TABLE orc_events PARTITION (run_date=201209, game_id=39, event_name='hq_change') SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid FROM src_events WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND event_name='hq_change' @@ -258,7 +258,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -400 +50 PREHOOK: query: INSERT OVERWRITE TABLE orc_events PARTITION (run_date=201209, game_id=39, event_name='hq_change') SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid FROM src_events WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND event_name='hq_change' @@ -287,4 +287,4 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -350 +50 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/orc_nonstd_partitions_loc.q.out b/itests/hive-blobstore/src/test/results/clientpositive/orc_nonstd_partitions_loc.q.out index 1201ce2107..bb63070503 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/orc_nonstd_partitions_loc.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/orc_nonstd_partitions_loc.q.out @@ -143,7 +143,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -200 +0 PREHOOK: query: ALTER TABLE orc_events ADD PARTITION (run_date=201211, game_id=39, event_name='hq_change') #### A masked pattern was here #### PREHOOK: type: ALTERTABLE_ADDPARTS @@ -193,7 +193,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -300 +100 PREHOOK: query: INSERT INTO TABLE orc_events PARTITION (run_date=201211, game_id=39, event_name='hq_change') SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid FROM src_events WHERE SUBSTR(run_date,1,6)='201211' @@ -232,7 +232,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -400 +200 PREHOOK: query: ALTER TABLE orc_events ADD PARTITION (run_date=201209, game_id=39, event_name='hq_change') #### A masked pattern was here #### PREHOOK: type: ALTERTABLE_ADDPARTS @@ -303,7 +303,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM orc_events POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_events #### A masked pattern was here #### -500 +300 PREHOOK: query: INSERT OVERWRITE TABLE orc_events PARTITION (run_date, game_id, event_name) SELECT * FROM src_events PREHOOK: type: QUERY diff --git a/itests/hive-blobstore/src/test/results/clientpositive/parquet_format_part.q.out b/itests/hive-blobstore/src/test/results/clientpositive/parquet_format_part.q.out index 0931e3d557..7758dc9d53 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/parquet_format_part.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/parquet_format_part.q.out @@ -143,7 +143,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -200 +0 PREHOOK: query: SELECT COUNT(*) FROM parquet_events WHERE run_date=20120921 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_events @@ -152,7 +152,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events WHERE run_date=20120921 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -50 +0 PREHOOK: query: SELECT COUNT(*) FROM parquet_events WHERE run_date=20121121 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_events @@ -161,7 +161,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events WHERE run_date=20121121 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -100 +0 PREHOOK: query: INSERT OVERWRITE TABLE parquet_events PARTITION (run_date=201211, game_id, event_name) SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid,game_id,event_name FROM src_events WHERE SUBSTR(run_date,1,6)='201211' @@ -200,7 +200,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -300 +0 PREHOOK: query: INSERT INTO TABLE parquet_events PARTITION (run_date=201209, game_id=39, event_name) SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid,event_name FROM src_events WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 @@ -229,7 +229,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -350 +0 PREHOOK: query: INSERT INTO TABLE parquet_events PARTITION (run_date=201209, game_id=39, event_name='hq_change') SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid FROM src_events WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND event_name='hq_change' @@ -258,7 +258,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -400 +50 PREHOOK: query: INSERT OVERWRITE TABLE parquet_events PARTITION (run_date=201209, game_id=39, event_name='hq_change') SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid FROM src_events WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND event_name='hq_change' @@ -287,4 +287,4 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -350 +50 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/parquet_nonstd_partitions_loc.q.out b/itests/hive-blobstore/src/test/results/clientpositive/parquet_nonstd_partitions_loc.q.out index 15ae3d91a7..0ccd0e4345 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/parquet_nonstd_partitions_loc.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/parquet_nonstd_partitions_loc.q.out @@ -143,7 +143,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -200 +0 PREHOOK: query: ALTER TABLE parquet_events ADD PARTITION (run_date=201211, game_id=39, event_name='hq_change') #### A masked pattern was here #### PREHOOK: type: ALTERTABLE_ADDPARTS @@ -193,7 +193,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -300 +100 PREHOOK: query: INSERT INTO TABLE parquet_events PARTITION (run_date=201211, game_id=39, event_name='hq_change') SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid FROM src_events WHERE SUBSTR(run_date,1,6)='201211' @@ -232,7 +232,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -400 +200 PREHOOK: query: ALTER TABLE parquet_events ADD PARTITION (run_date=201209, game_id=39, event_name='hq_change') #### A masked pattern was here #### PREHOOK: type: ALTERTABLE_ADDPARTS @@ -303,7 +303,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM parquet_events POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_events #### A masked pattern was here #### -500 +300 PREHOOK: query: INSERT OVERWRITE TABLE parquet_events PARTITION (run_date, game_id, event_name) SELECT * FROM src_events PREHOOK: type: QUERY diff --git a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_part.q.out b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_part.q.out index 24fc525804..340791aa7f 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_part.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_format_part.q.out @@ -143,7 +143,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -200 +0 PREHOOK: query: SELECT COUNT(*) FROM rcfile_events WHERE run_date=20120921 PREHOOK: type: QUERY PREHOOK: Input: default@rcfile_events @@ -152,7 +152,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events WHERE run_date=20120921 POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -50 +0 PREHOOK: query: SELECT COUNT(*) FROM rcfile_events WHERE run_date=20121121 PREHOOK: type: QUERY PREHOOK: Input: default@rcfile_events @@ -161,7 +161,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events WHERE run_date=20121121 POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -100 +0 PREHOOK: query: INSERT OVERWRITE TABLE rcfile_events PARTITION (run_date=201211, game_id, event_name) SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid,game_id,event_name FROM src_events WHERE SUBSTR(run_date,1,6)='201211' @@ -200,7 +200,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -300 +0 PREHOOK: query: INSERT INTO TABLE rcfile_events PARTITION (run_date=201209, game_id=39, event_name) SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid,event_name FROM src_events WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 @@ -229,7 +229,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -350 +0 PREHOOK: query: INSERT INTO TABLE rcfile_events PARTITION (run_date=201209, game_id=39, event_name='hq_change') SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid FROM src_events WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND event_name='hq_change' @@ -258,7 +258,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -400 +50 PREHOOK: query: INSERT OVERWRITE TABLE rcfile_events PARTITION (run_date=201209, game_id=39, event_name='hq_change') SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid FROM src_events WHERE SUBSTR(run_date,1,6)='201209' AND game_id=39 AND event_name='hq_change' @@ -287,4 +287,4 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -350 +50 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_nonstd_partitions_loc.q.out b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_nonstd_partitions_loc.q.out index 6bcfe41262..160842236b 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/rcfile_nonstd_partitions_loc.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/rcfile_nonstd_partitions_loc.q.out @@ -143,7 +143,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -200 +0 PREHOOK: query: ALTER TABLE rcfile_events ADD PARTITION (run_date=201211, game_id=39, event_name='hq_change') #### A masked pattern was here #### PREHOOK: type: ALTERTABLE_ADDPARTS @@ -193,7 +193,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -300 +100 PREHOOK: query: INSERT INTO TABLE rcfile_events PARTITION (run_date=201211, game_id=39, event_name='hq_change') SELECT log_id,`time`,uid,user_id,type,event_data,session_id,full_uid FROM src_events WHERE SUBSTR(run_date,1,6)='201211' @@ -232,7 +232,7 @@ POSTHOOK: query: SELECT COUNT(*) FROM rcfile_events POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_events #### A masked pattern was here #### -400 +200 PREHOOK: query: ALTER TABLE rcfile_events ADD PARTITION (run_date=201209, game_id=39, event_name='hq_change') #### A masked pattern was here #### PREHOOK: type: ALTERTABLE_ADDPARTS diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 71f73802c2..09ac358f4b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -115,12 +115,7 @@ public void initialize(HiveConf hiveConf) { transformations.add(new ConstantPropagate()); } - if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.DYNAMICPARTITIONING) && - HiveConf.getVar(hiveConf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equals("nonstrict") && - HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTDYNAMICPARTITION) && - !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) { - transformations.add(new SortedDynPartitionOptimizer()); - } + transformations.add(new SortedDynPartitionTimeGranularityOptimizer()); @@ -196,9 +191,6 @@ public void initialize(HiveConf hiveConf) { transformations.add(new FixedBucketPruningOptimizer(compatMode)); } - if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) || pctx.hasAcidWrite()) { - transformations.add(new ReduceSinkDeDuplication()); - } transformations.add(new NonBlockingOpDeDupProc()); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 2dc2351793..02f93d0923 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -27,6 +27,7 @@ import java.util.Set; import java.util.Stack; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -55,6 +56,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -69,6 +71,7 @@ import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.orc.OrcConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -174,13 +177,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // unlink connection between FS and its parent fsParent = fsOp.getParentOperators().get(0); + DynamicPartitionCtx dpCtx = fsOp.getConf().getDynPartCtx(); + List partitionPositions = getPartitionPositions(dpCtx, fsParent.getSchema()); + if(!shouldDo(partitionPositions, fsParent)) { + return null; + } + fsParent.getChildOperators().clear(); - DynamicPartitionCtx dpCtx = fsOp.getConf().getDynPartCtx(); - int numBuckets = destTable.getNumBuckets(); // if enforce bucketing/sorting is disabled numBuckets will not be set. // set the number of buckets here to ensure creation of empty buckets + int numBuckets = destTable.getNumBuckets(); dpCtx.setNumBuckets(numBuckets); // Get the positions for partition, bucket and sort columns @@ -232,7 +240,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (int i : sortPositions) LOG.debug("sort position " + i); for (int i : sortOrder) LOG.debug("sort order " + i); for (int i : sortNullOrder) LOG.debug("sort null order " + i); - List partitionPositions = getPartitionPositions(dpCtx, fsParent.getSchema()); // update file sink descriptor fsOp.getConf().setMultiFileSpray(false); @@ -651,6 +658,60 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, return cols; } - } + // the idea is to estimate how many number of writers this insert can spun up. + // Writers are proportional to number of partitions being inserted i.e cardinality of the partition columns + // if these writers are less than number of writers allowed within the memory pool (estimated) we go ahead with + // adding extra RS + // The way max number of writers allowed are computed based on + // (executor/container memory) * (percentage of memory taken by orc) + // and dividing that by max memory (stripe size) taken by a single writer. + //TODO: take number of buckets into account + private boolean shouldDo(List partitionPos, Operator fsParent) { + int threshold = HiveConf.getIntVar(this.parseCtx.getConf(), + HiveConf.ConfVars.HIVEOPTSORTDYNAMICPARTITIONTHRESHOLD); + long MAX_WRITERS = -1; + switch(threshold) { + case -1: + return false; + case 0: + break; + case 1: + return true; + default: + MAX_WRITERS = threshold; + break; + } + List colStats = fsParent.getStatistics().getColumnStats(); + if(colStats == null || colStats.isEmpty()) { + return false; + } + long partCardinality = 1; + + // compute cardinality for partition columns + for(Integer idx:partitionPos) { + ColumnInfo ci = fsParent.getSchema().getSignature().get(idx); + ColStatistics partStats = fsParent.getStatistics().getColumnStatisticsFromColName(ci.getInternalName()); + if(partStats == null) { + // statistics for this partition are for some reason not available + return false; + } + partCardinality = partCardinality * partStats.getCountDistint(); + } + + if(MAX_WRITERS < 0) { + double orcMemPool = this.parseCtx.getConf().getDouble(OrcConf.MEMORY_POOL.getHiveConfName(), + (Double)OrcConf.MEMORY_POOL.getDefaultValue()); + long orcStripSize = this.parseCtx.getConf().getLong(OrcConf.STRIPE_SIZE.getHiveConfName(), + (Long)OrcConf.STRIPE_SIZE.getDefaultValue()); + long executorMem = 4000000000L; + MAX_WRITERS = (long)(executorMem * orcMemPool)/orcStripSize; + + } + if(partCardinality <= MAX_WRITERS) { + return false; + } + return true; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index fdc963506c..e1b5076e88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -81,7 +81,9 @@ import org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize; import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer; +import org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer; import org.apache.hadoop.hive.ql.optimizer.TopNKeyProcessor; +import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; @@ -170,6 +172,22 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, // Update bucketing version of ReduceSinkOp if needed updateBucketingVersionForUpgrade(procCtx); + // run Sorted dynamic partition optimization + if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONING) && + HiveConf.getVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equals("nonstrict") && + !HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + new SortedDynPartitionOptimizer().transform(procCtx.parseContext); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Sorted dynamic partition optimization"); + } + + if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) + || procCtx.parseContext.hasAcidWrite()) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + new ReduceSinkDeDuplication().transform(procCtx.parseContext); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Reduce Sink de-duplication"); + } + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // run the optimizations that use stats for optimization runStatsDependentOptimizations(procCtx, inputs, outputs); diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q b/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q index 3c2918fdca..d5148560aa 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q @@ -8,8 +8,7 @@ set hive.exec.max.dynamic.partitions=1000; set hive.exec.max.dynamic.partitions.pernode=1000; set hive.exec.dynamic.partition.mode=nonstrict; set hive.vectorized.execution.enabled=true; - - +set hive.optimize.sort.dynamic.partition.threshold=1; create table over1k_n1( t tinyint, diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q index 436c0edc77..1296ba88d5 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q @@ -7,8 +7,7 @@ set hive.exec.dynamic.partition=true; set hive.exec.max.dynamic.partitions=1000; set hive.exec.max.dynamic.partitions.pernode=1000; set hive.exec.dynamic.partition.mode=nonstrict; - - +set hive.optimize.sort.dynamic.partition.threshold=1; create table over1k_n3( t tinyint, @@ -209,3 +208,40 @@ insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from ov insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k_n3 where i=100 and t=27 and s="foo"; select sum(hash(*)) from over1k_part3; + +drop table over1k_n3; +create table over1k_n3( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over1k' into table over1k_n3; + +analyze table over1k_n3 compute statistics for columns; +set hive.stats.fetch.column.stats=true; +set hive.optimize.sort.dynamic.partition=true; + +-- default hive should do cost based and add extra RS +set hive.optimize.sort.dynamic.partition.threshold=0; +explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t>27; + +-- default but shouldn't add extra RS +explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10; + +-- disable +set hive.optimize.sort.dynamic.partition.threshold=-1; +explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t>27; + +-- enable, will add extra RS +set hive.optimize.sort.dynamic.partition.threshold=1; +explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10; diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q index 6dfb51a7da..78d08459b5 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q @@ -6,8 +6,7 @@ set hive.exec.dynamic.partition=true; set hive.exec.max.dynamic.partitions=1000; set hive.exec.max.dynamic.partitions.pernode=1000; set hive.exec.dynamic.partition.mode=nonstrict; - - +set hive.optimize.sort.dynamic.partition.threshold=1; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out index e1af88ad81..e06624c32b 100644 --- a/ql/src/test/results/clientpositive/acid_table_stats.q.out +++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out @@ -97,7 +97,7 @@ Partition Parameters: numFiles 2 numRows 1000 rawDataSize 0 - totalSize 4063 + totalSize 4542 #### A masked pattern was here #### # Storage Information @@ -184,7 +184,7 @@ Partition Parameters: numFiles 2 numRows 1000 rawDataSize 0 - totalSize 4063 + totalSize 4542 #### A masked pattern was here #### # Storage Information @@ -235,7 +235,7 @@ Partition Parameters: numFiles 2 numRows 1000 rawDataSize 0 - totalSize 4063 + totalSize 4542 #### A masked pattern was here #### # Storage Information @@ -331,7 +331,7 @@ Partition Parameters: numFiles 4 numRows 2000 rawDataSize 0 - totalSize 8126 + totalSize 9085 #### A masked pattern was here #### # Storage Information @@ -380,7 +380,7 @@ Partition Parameters: numFiles 4 numRows 2000 rawDataSize 0 - totalSize 8126 + totalSize 9085 #### A masked pattern was here #### # Storage Information @@ -455,11 +455,11 @@ STAGE PLANS: TableScan alias: acid filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 2000 Data size: 81260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 90850 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 2000 Data size: 81260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 90850 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(key) mode: hash @@ -591,7 +591,7 @@ Partition Parameters: numFiles 2 numRows 1000 rawDataSize 176000 - totalSize 3008 + totalSize 3485 #### A masked pattern was here #### # Storage Information diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out index 20f88bdb70..5bc944271d 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out @@ -31,8 +31,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -46,26 +48,62 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string), _bucket_number (type: string), _col1 (type: string) - sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + key expressions: _col1 (type: string) + sort order: + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _bucket_number + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.non_acid + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,7 +118,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.non_acid - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -88,3 +126,33 @@ STAGE PLANS: Column Types: string, string Table: default.non_acid + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out index 0414812ca9..1d629abc21 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out @@ -223,16 +223,16 @@ STAGE PLANS: TableScan alias: bucket_small_n17 filterExpr: pri BETWEEN 1 AND 2 (type: boolean) - Statistics: Num rows: 236 Data size: 23364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: bigint), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 236 Data size: 23364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 236 Data size: 23364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -246,14 +246,14 @@ STAGE PLANS: 0 _col0 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 20 Data size: 3436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), _col3 (type: string), 'day1' (type: string), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 20 Data size: 5276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 5276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -371,11 +371,11 @@ STAGE PLANS: TableScan alias: bucket_small_n17 filterExpr: pri BETWEEN 1 AND 2 (type: boolean) - Statistics: Num rows: 236 Data size: 23364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: bigint), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 236 Data size: 23364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: bucket_big_n17 @@ -392,14 +392,14 @@ STAGE PLANS: 0 _col0 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 20 Data size: 3436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), _col3 (type: string), 'day1' (type: string), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 20 Data size: 5276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 5276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -446,30 +446,11 @@ POSTHOOK: Input: default@bucket_small_n17 POSTHOOK: Input: default@bucket_small_n17@pri=1 POSTHOOK: Input: default@bucket_small_n17@pri=2 #### A masked pattern was here #### -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -103 val_103 val_103 day1 1 -103 val_103 val_103 day1 1 -103 val_103 val_103 day1 1 -103 val_103 val_103 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -172 val_172 val_172 day1 1 -172 val_172 val_172 day1 1 -172 val_172 val_172 day1 1 -172 val_172 val_172 day1 1 -374 val_374 val_374 day1 1 -374 val_374 val_374 day1 1 +0 val_0 NULL day1 1 +103 val_103 NULL day1 1 +169 val_169 NULL day1 1 +172 val_172 NULL day1 1 +374 val_374 NULL day1 1 PREHOOK: query: drop table bucket_big_n17 PREHOOK: type: DROPTABLE PREHOOK: Input: default@bucket_big_n17 @@ -627,11 +608,11 @@ STAGE PLANS: TableScan alias: bucket_small_n17 filterExpr: pri BETWEEN 1 AND 2 (type: boolean) - Statistics: Num rows: 236 Data size: 23364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: bigint), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 236 Data size: 23364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 236 Data size: 45636 Basic stats: COMPLETE Column stats: NONE Map Operator Tree: TableScan alias: bucket_big_n17 @@ -648,14 +629,14 @@ STAGE PLANS: 0 _col0 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 20 Data size: 3436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col1 (type: string), _col3 (type: string), 'day1' (type: string), 1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 20 Data size: 5276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 5276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 259 Data size: 50199 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -702,27 +683,8 @@ POSTHOOK: Input: default@bucket_small_n17 POSTHOOK: Input: default@bucket_small_n17@pri=1 POSTHOOK: Input: default@bucket_small_n17@pri=2 #### A masked pattern was here #### -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -0 val_0 val_0 day1 1 -103 val_103 val_103 day1 1 -103 val_103 val_103 day1 1 -103 val_103 val_103 day1 1 -103 val_103 val_103 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -169 val_169 val_169 day1 1 -172 val_172 val_172 day1 1 -172 val_172 val_172 day1 1 -172 val_172 val_172 day1 1 -172 val_172 val_172 day1 1 -374 val_374 val_374 day1 1 -374 val_374 val_374 day1 1 +0 val_0 NULL day1 1 +103 val_103 NULL day1 1 +169 val_169 NULL day1 1 +172 val_172 NULL day1 1 +374 val_374 NULL day1 1 diff --git a/ql/src/test/results/clientpositive/llap/delete_all_partitioned.q.out b/ql/src/test/results/clientpositive/llap/delete_all_partitioned.q.out index 4c1a024683..a691f35362 100644 --- a/ql/src/test/results/clientpositive/llap/delete_all_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/delete_all_partitioned.q.out @@ -84,3 +84,11 @@ POSTHOOK: Input: default@acid_dap POSTHOOK: Input: default@acid_dap@ds=today POSTHOOK: Input: default@acid_dap@ds=tomorrow #### A masked pattern was here #### +-1069736047 k17Am8uPHWk02cEf1jet today +-1070551679 iUR3Q today +-1070883071 0ruyd6Y50JpdGRf6HqD today +-1071363017 Anj0oF today +-1072076362 2uLyD28144vklju213J1mr today +-1072081801 dPkN74F7 today +-1072910839 0iqrc5 today +-1073279343 oj1YrV5Wa today diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index 62fdbdba42..b20593848a 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -189,7 +189,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -199,7 +198,6 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -275,7 +273,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -293,7 +290,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -301,7 +297,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -374,7 +369,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -384,11 +378,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -456,7 +449,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -466,11 +458,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -606,7 +597,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -616,7 +606,6 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -692,7 +681,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -710,7 +698,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -718,7 +705,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -791,7 +777,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -801,11 +786,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -873,7 +857,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -883,11 +866,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1171,7 +1153,7 @@ Partition Parameters: numFiles 8 numRows 32 rawDataSize 640 - totalSize 4648 + totalSize 4670 #### A masked pattern was here #### # Storage Information @@ -1249,7 +1231,7 @@ Partition Parameters: numFiles 8 numRows 32 rawDataSize 640 - totalSize 4658 + totalSize 4656 #### A masked pattern was here #### # Storage Information @@ -1390,7 +1372,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1407,63 +1388,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + key expressions: _col4 (type: tinyint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) + expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) - outputColumnNames: si, i, b, f, ds, t - Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') - keys: ds (type: string), t (type: tinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: tinyint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1527,7 +1472,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1537,7 +1481,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -1607,7 +1550,6 @@ STAGE PLANS: key expressions: _col2 (type: int) sort order: + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1632,7 +1574,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -1640,7 +1581,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -1694,7 +1634,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1714,7 +1653,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ - Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1730,50 +1669,21 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.over1k_part2_orc Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) - outputColumnNames: si, i, b, f, ds, t - Statistics: Num rows: 5 Data size: 555 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') - keys: ds (type: string), t (type: tinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: tinyint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc Stage: Stage-2 Dependency Collection @@ -1842,7 +1752,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float) @@ -1853,15 +1763,21 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.over1k_part2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc Stage: Stage-2 Dependency Collection @@ -2210,7 +2126,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2227,64 +2142,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: float) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col4 (type: tinyint) + key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) + sort order: +++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col3 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2_orc - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: si, i, b, f, t - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') - keys: t (type: tinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 8500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 5 Data size: 8500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - keys: KEY._col0 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 8820 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 8820 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 8820 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2347,7 +2225,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2357,11 +2234,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2906,7 +2782,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: smallint), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2924,7 +2799,6 @@ STAGE PLANS: key expressions: _col2 (type: string), _bucket_number (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 10 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: smallint) Reducer 3 Execution mode: vectorized, llap @@ -2932,11 +2806,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: smallint), KEY._col2 (type: string), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _bucket_number - Statistics: Num rows: 10 Data size: 2656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 10 Data size: 2656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index ec95389d08..4101783687 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -146,7 +146,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs @@ -156,7 +155,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -232,7 +230,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: llap LLAP IO: no inputs @@ -250,7 +247,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: llap @@ -258,7 +254,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -331,7 +326,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs @@ -341,7 +335,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -413,7 +406,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -423,7 +415,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -563,7 +554,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs @@ -573,7 +563,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -649,7 +638,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: llap LLAP IO: no inputs @@ -667,7 +655,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: llap @@ -675,7 +662,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -748,7 +734,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs @@ -758,7 +743,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -830,7 +814,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -840,7 +823,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -1347,7 +1329,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1364,63 +1345,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + key expressions: _col4 (type: tinyint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) + expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2 - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) - outputColumnNames: si, i, b, f, ds, t - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') - keys: ds (type: string), t (type: tinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: tinyint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1484,7 +1429,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs @@ -1494,7 +1438,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -1564,7 +1507,6 @@ STAGE PLANS: key expressions: _col2 (type: int) sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) Execution mode: llap LLAP IO: no inputs @@ -1589,7 +1531,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: llap @@ -1597,7 +1538,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -1651,7 +1591,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1671,7 +1610,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ - Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs @@ -1687,50 +1626,21 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.over1k_part2 Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) - outputColumnNames: si, i, b, f, ds, t - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') - keys: ds (type: string), t (type: tinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: tinyint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 Stage: Stage-2 Dependency Collection @@ -1810,15 +1720,21 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.over1k_part2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 Stage: Stage-2 Dependency Collection @@ -2167,7 +2083,6 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2184,64 +2099,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: float) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col4 (type: tinyint) + key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) + sort order: +++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col3 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2 - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: si, i, b, f, t - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') - keys: t (type: tinyint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) - keys: KEY._col0 (type: tinyint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2304,7 +2182,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2314,7 +2191,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -2680,7 +2556,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -2690,7 +2565,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -2764,7 +2638,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -2774,7 +2647,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -2848,7 +2720,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -2858,7 +2729,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -2932,7 +2802,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -2942,7 +2811,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -3016,7 +2884,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -3026,7 +2893,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -3100,7 +2966,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -3110,7 +2975,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -3584,3 +3448,471 @@ POSTHOOK: Input: default@over1k_part3@s=wendy van buren/t=27/i=65680 POSTHOOK: Input: default@over1k_part3@s=xavier quirinius/t=27/i=65599 #### A masked pattern was here #### 17814641134 +PREHOOK: query: drop table over1k_n3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_n3 +POSTHOOK: query: drop table over1k_n3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@over1k_n3 +POSTHOOK: Output: default@over1k_n3 +PREHOOK: query: create table over1k_n3( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_n3 +POSTHOOK: query: create table over1k_n3( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_n3 +PREHOOK: query: load data local inpath '../../data/files/over1k' into table over1k_n3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over1k_n3 +POSTHOOK: query: load data local inpath '../../data/files/over1k' into table over1k_n3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over1k_n3 +PREHOOK: query: analyze table over1k_n3 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_n3 +#### A masked pattern was here #### +POSTHOOK: query: analyze table over1k_n3 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@over1k_n3 +POSTHOOK: Output: default@over1k_n3 +#### A masked pattern was here #### +PREHOOK: query: explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t>27 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part@ds=foo +POSTHOOK: query: explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t>27 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_n3 + filterExpr: (t is null or (t > 27Y)) (type: boolean) + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((t > 27Y) or t is null) (type: boolean) + Statistics: Num rows: 352 Data size: 8448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 352 Data size: 8448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 352 Data size: 8448 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part + +PREHOOK: query: explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part@ds=foo +POSTHOOK: query: explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_n3 + filterExpr: (t is null or (t = 27Y)) (type: boolean) + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((t = 27Y) or t is null) (type: boolean) + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part + +PREHOOK: query: explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t>27 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part@ds=foo +POSTHOOK: query: explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t>27 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_n3 + filterExpr: (t is null or (t > 27Y)) (type: boolean) + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((t > 27Y) or t is null) (type: boolean) + Statistics: Num rows: 352 Data size: 8448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 352 Data size: 8448 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 352 Data size: 8448 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 352 Data size: 39072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part + +PREHOOK: query: explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part@ds=foo +POSTHOOK: query: explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_n3 + filterExpr: (t is null or (t = 27Y)) (type: boolean) + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((t = 27Y) or t is null) (type: boolean) + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part + diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out index d953eebefd..27cce0a8b3 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out @@ -87,6 +87,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -122,15 +123,26 @@ STAGE PLANS: expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.ss_part + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: float), _col1 (type: float) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), KEY._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.ss_part Stage: Stage-2 Dependency Collection @@ -335,6 +347,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -365,6 +378,17 @@ STAGE PLANS: expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: float), _col1 (type: float) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), KEY._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index 5fd1bf6fb2..e304a0e725 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -512,23 +512,23 @@ STAGE PLANS: outputColumnNames: _col0, _col3 Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: _col3 (type: string), _bucket_number (type: string), _col0 (type: struct) - sort order: +++ - Map-reduce partition columns: _col3 (type: string) + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col3 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), KEY._col3 (type: string), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _bucket_number - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: PARTIAL + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1283,23 +1283,23 @@ STAGE PLANS: outputColumnNames: _col0, _col4 Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: '2008-04-08' (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct) - sort order: ++++ - Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col4 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL + expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1419,7 +1419,7 @@ STAGE PLANS: TableScan alias: acid_2l_part_sdpo filterExpr: (value = 'bar') (type: boolean) - Statistics: Num rows: 4200 Data size: 1253037 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4200 Data size: 1243317 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) Statistics: Num rows: 5 Data size: 1375 Basic stats: COMPLETE Column stats: PARTIAL @@ -1428,23 +1428,23 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int), _bucket_number (type: string), _col0 (type: struct) - sort order: ++++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY._col0 (type: struct), KEY._col1 (type: string), KEY._col2 (type: int), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _bucket_number - Statistics: Num rows: 5 Data size: 2240 Basic stats: COMPLETE Column stats: PARTIAL + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 2240 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1633,24 +1633,23 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct) - sort order: ++++ - Map-reduce partition columns: _col3 (type: string), _col4 (type: int) + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string), 'bar' (type: string) + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY._col0 (type: struct), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), VALUE._col1 (type: string), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1742,24 +1741,23 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct) - sort order: ++++ - Map-reduce partition columns: _col3 (type: string), _col4 (type: int) + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string), 'bar' (type: string) + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY._col0 (type: struct), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), 'bar' (type: string), VALUE._col1 (type: string), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out index 2c838b6a15..4ed30ee8a0 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_3.q.out @@ -60,7 +60,6 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 55 Data size: 19268 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -70,7 +69,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY._col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 19268 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED diff --git a/ql/src/test/results/clientpositive/llap/mm_dp.q.out b/ql/src/test/results/clientpositive/llap/mm_dp.q.out index b23a3d720b..6ad05e9ed1 100644 --- a/ql/src/test/results/clientpositive/llap/mm_dp.q.out +++ b/ql/src/test/results/clientpositive/llap/mm_dp.q.out @@ -149,7 +149,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _bucket_number (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 2605 Data size: 268315 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -159,11 +158,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _bucket_number - Statistics: Num rows: 2605 Data size: 747635 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 2605 Data size: 747635 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2605 Data size: 268315 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat