diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 498877abb2..6fd1093e6d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -188,8 +188,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // unlink connection between FS and its parent fsParent = fsOp.getParentOperators().get(0); - fsParent.getChildOperators().clear(); - + fsParent.getChildOperators().remove(fsOp); // if enforce bucketing/sorting is disabled numBuckets will not be set. // set the number of buckets here to ensure creation of empty buckets @@ -399,7 +398,7 @@ private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) { return false; } } - rsParent.getChildOperators().clear(); + rsParent.getChildOperators().remove(rsToRemove); rsParent.getChildOperators().add(rsGrandChild); rsGrandChild.getParentOperators().clear(); rsGrandChild.getParentOperators().add(rsParent); diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q index dbeb874019..03bf10bc5e 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q @@ -245,4 +245,36 @@ explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b, set hive.optimize.sort.dynamic.partition.threshold=1; explain insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k_n3 where t is null or t=27 limit 10; + +create table over1k_part4_0(i int) partitioned by (s string); +create table over1k_part4_1(i int) partitioned by (s string); + +EXPLAIN +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +; + +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +; + +select count(1) from over1k_part4_0; +select count(1) from over1k_part4_1; + drop table over1k_n3; diff --git a/ql/src/test/results/clientpositive/autoColumnStats_1.q.out b/ql/src/test/results/clientpositive/autoColumnStats_1.q.out index bcabc02a4b..4131535972 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_1.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_1.q.out @@ -1389,11 +1389,11 @@ STAGE PLANS: Processor Tree: TableScan alias: a_n12 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select value from b_n9 @@ -1416,11 +1416,11 @@ STAGE PLANS: Processor Tree: TableScan alias: b_n9 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select key from b_n9 @@ -1443,11 +1443,11 @@ STAGE PLANS: Processor Tree: TableScan alias: b_n9 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select value from c_n2 @@ -1472,11 +1472,11 @@ STAGE PLANS: Processor Tree: TableScan alias: c_n2 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select key from c_n2 @@ -1501,10 +1501,10 @@ STAGE PLANS: Processor Tree: TableScan alias: c_n2 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/autoColumnStats_2.q.out b/ql/src/test/results/clientpositive/autoColumnStats_2.q.out index 3618b02f8e..a8371236e7 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_2.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_2.q.out @@ -1519,11 +1519,11 @@ STAGE PLANS: Processor Tree: TableScan alias: a_n3 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select value from b_n3 @@ -1546,11 +1546,11 @@ STAGE PLANS: Processor Tree: TableScan alias: b_n3 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select key from b_n3 @@ -1573,11 +1573,11 @@ STAGE PLANS: Processor Tree: TableScan alias: b_n3 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select value from c_n1 @@ -1602,11 +1602,11 @@ STAGE PLANS: Processor Tree: TableScan alias: c_n1 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select key from c_n1 @@ -1631,10 +1631,10 @@ STAGE PLANS: Processor Tree: TableScan alias: c_n1 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 371032 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out index 2c2baf1f0a..ad18a805fc 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out @@ -29,8 +29,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -43,11 +44,56 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - value expressions: _col0 (type: int), _col1 (type: string) + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string), CAST( _col3 AS STRING) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col3 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -77,7 +123,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out index d0c660219f..3c1fb82d18 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out @@ -57,11 +57,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-1, Stage-5 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -79,14 +81,43 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - tag: -1 - value expressions: _col0 (type: string), _col1 (type: string) - auto parallelism: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -95,6 +126,34 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false File Output Operator compressed: false GlobalTableId: 0 @@ -113,7 +172,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Execution mode: vectorized Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -319,6 +377,82 @@ STAGE PLANS: /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) @@ -382,12 +516,99 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part8 - Stage: Stage-3 + Stage: Stage-4 Stats Work Basic Stats Work: #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-7 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -406,7 +627,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10004 + base file name: -mr-10006 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -493,16 +714,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part8 - Stage: Stage-5 - Stats Work - Basic Stats Work: -#### A masked pattern was here #### - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.nzhang_part8 - Is Table Level Stats: false - PREHOOK: query: from srcpart insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part8 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index a9b927b7f3..9ddbb469bc 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -340,12 +340,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d_n1 - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d_n1 @@ -561,12 +561,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d_n1 - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc_1d_n1 partition(year='2000') compute statistics for columns state @@ -1031,12 +1031,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d_n1 - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: create table if not exists loc_orc_2d_n1 ( @@ -1681,12 +1681,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d_n1 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_2d_n1 @@ -2249,11 +2249,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d_n1 - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index f865eb96e0..93099a12ba 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -420,13 +420,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 + Stage-4 depends on stages: Stage-0, Stage-2 + Stage-3 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 + Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-1 @@ -464,6 +465,22 @@ STAGE PLANS: expressions: _col0 (type: string), CAST( _col1 AS STRING) (type: string), if(((UDFToDouble(_col0) % 100.0D) = 0.0D), '11', '12') (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe File Output Operator compressed: false table: @@ -472,6 +489,44 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_n8 + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -495,10 +550,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_table_n8 - Stage: Stage-8 + Stage: Stage-9 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -517,29 +572,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_table_n8 - Stage: Stage-3 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.test_table_n8 - - Stage: Stage-4 + Stage: Stage-5 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-6 + Stage: Stage-7 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-7 + Stage: Stage-8 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/insert_into6.q.out b/ql/src/test/results/clientpositive/insert_into6.q.out index f13f764b0f..035a29f6b2 100644 --- a/ql/src/test/results/clientpositive/insert_into6.q.out +++ b/ql/src/test/results/clientpositive/insert_into6.q.out @@ -196,8 +196,9 @@ POSTHOOK: Input: default@insert_into6a@ds=1 POSTHOOK: Input: default@insert_into6a@ds=2 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -210,11 +211,56 @@ STAGE PLANS: expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - value expressions: _col0 (type: int), _col1 (type: string) + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -242,7 +288,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6b - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index bd3c7769a4..cf6c335222 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -252,6 +252,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -277,7 +278,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) @@ -286,12 +287,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -660,6 +697,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -685,7 +723,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) @@ -694,12 +732,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -1572,6 +1646,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1591,7 +1666,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float) @@ -1607,12 +1682,48 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 1787 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 1787 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1851 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index bf29a112fc..95c3aedc7b 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -127,6 +127,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -142,6 +143,22 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + @@ -150,6 +167,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -209,6 +246,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -243,12 +281,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -535,6 +609,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -550,6 +625,22 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + @@ -558,6 +649,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -617,6 +728,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -651,12 +763,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1529,6 +1677,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1564,12 +1713,48 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -2636,6 +2821,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2651,6 +2837,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), t (type: tinyint), i (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -2659,6 +2861,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -2718,6 +2940,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2733,6 +2956,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), 27Y (type: tinyint), i (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -2741,6 +2980,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -2800,6 +3059,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2815,16 +3075,52 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), t (type: tinyint), 100 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - sort order: +++ - Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Output Operator + key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -2882,6 +3178,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2897,6 +3194,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), 27Y (type: tinyint), 100 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -2905,6 +3218,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -2964,6 +3297,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2979,6 +3313,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), t (type: tinyint), 100 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -2987,6 +3337,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3046,6 +3416,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3061,6 +3432,22 @@ STAGE PLANS: expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), 27Y (type: tinyint), i (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ @@ -3069,6 +3456,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3626,6 +4033,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3641,6 +4049,22 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 352 Data size: 8448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 352 Data size: 39072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 129 Data size: 230523 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + @@ -3649,6 +4073,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 128 Data size: 236928 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3708,6 +4152,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3742,12 +4187,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3912,6 +4393,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3946,12 +4428,48 @@ STAGE PLANS: Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 10 Data size: 1110 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll') + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3991,6 +4509,452 @@ STAGE PLANS: Column Types: smallint, int, bigint, float Table: default.over1k_part +PREHOOK: query: create table over1k_part4_0(i int) partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part4_0 +POSTHOOK: query: create table over1k_part4_0(i int) partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part4_0 +PREHOOK: query: create table over1k_part4_1(i int) partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: create table over1k_part4_1(i int) partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part4_1 +PREHOOK: query: EXPLAIN +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part4_0 +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: EXPLAIN +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_n3 + filterExpr: ((s like 'bob%') and (i > 1)) (type: boolean) + Statistics: Num rows: 1049 Data size: 105949 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((i > 1) and (s like 'bob%')) (type: boolean) + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), s (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 + 1) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: i, s + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(i, 'hll') + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int) + Select Operator + expressions: (_col0 + 0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: i, s + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(i, 'hll') + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 262 Data size: 136502 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 262 Data size: 140694 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: i + Column Types: int + Table: default.over1k_part4_0 + + Stage: Stage-1 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: i + Column Types: int + Table: default.over1k_part4_1 + +PREHOOK: query: WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part4_0 +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +POSTHOOK: Output: default@over1k_part4_0@s=bob brown +POSTHOOK: Output: default@over1k_part4_0@s=bob carson +POSTHOOK: Output: default@over1k_part4_0@s=bob davidson +POSTHOOK: Output: default@over1k_part4_0@s=bob ellison +POSTHOOK: Output: default@over1k_part4_0@s=bob falkner +POSTHOOK: Output: default@over1k_part4_0@s=bob garcia +POSTHOOK: Output: default@over1k_part4_0@s=bob hernandez +POSTHOOK: Output: default@over1k_part4_0@s=bob ichabod +POSTHOOK: Output: default@over1k_part4_0@s=bob king +POSTHOOK: Output: default@over1k_part4_0@s=bob laertes +POSTHOOK: Output: default@over1k_part4_0@s=bob miller +POSTHOOK: Output: default@over1k_part4_0@s=bob ovid +POSTHOOK: Output: default@over1k_part4_0@s=bob polk +POSTHOOK: Output: default@over1k_part4_0@s=bob quirinius +POSTHOOK: Output: default@over1k_part4_0@s=bob steinbeck +POSTHOOK: Output: default@over1k_part4_0@s=bob van buren +POSTHOOK: Output: default@over1k_part4_0@s=bob white +POSTHOOK: Output: default@over1k_part4_0@s=bob xylophone +POSTHOOK: Output: default@over1k_part4_0@s=bob young +POSTHOOK: Output: default@over1k_part4_0@s=bob zipper +POSTHOOK: Output: default@over1k_part4_1@s=bob brown +POSTHOOK: Output: default@over1k_part4_1@s=bob carson +POSTHOOK: Output: default@over1k_part4_1@s=bob davidson +POSTHOOK: Output: default@over1k_part4_1@s=bob ellison +POSTHOOK: Output: default@over1k_part4_1@s=bob falkner +POSTHOOK: Output: default@over1k_part4_1@s=bob garcia +POSTHOOK: Output: default@over1k_part4_1@s=bob hernandez +POSTHOOK: Output: default@over1k_part4_1@s=bob ichabod +POSTHOOK: Output: default@over1k_part4_1@s=bob king +POSTHOOK: Output: default@over1k_part4_1@s=bob laertes +POSTHOOK: Output: default@over1k_part4_1@s=bob miller +POSTHOOK: Output: default@over1k_part4_1@s=bob ovid +POSTHOOK: Output: default@over1k_part4_1@s=bob polk +POSTHOOK: Output: default@over1k_part4_1@s=bob quirinius +POSTHOOK: Output: default@over1k_part4_1@s=bob steinbeck +POSTHOOK: Output: default@over1k_part4_1@s=bob van buren +POSTHOOK: Output: default@over1k_part4_1@s=bob white +POSTHOOK: Output: default@over1k_part4_1@s=bob xylophone +POSTHOOK: Output: default@over1k_part4_1@s=bob young +POSTHOOK: Output: default@over1k_part4_1@s=bob zipper +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob brown).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob carson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob davidson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob ellison).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob falkner).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob garcia).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob hernandez).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob ichabod).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob king).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob laertes).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob miller).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob ovid).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob polk).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob quirinius).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob steinbeck).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob van buren).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob white).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob xylophone).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob young).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob zipper).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob brown).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob carson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob davidson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob ellison).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob falkner).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob garcia).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob hernandez).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob ichabod).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob king).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob laertes).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob miller).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob ovid).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob polk).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob quirinius).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob steinbeck).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob van buren).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob white).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob xylophone).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob young).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob zipper).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +PREHOOK: query: select count(1) from over1k_part4_0 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_part4_0 +PREHOOK: Input: default@over1k_part4_0@s=bob brown +PREHOOK: Input: default@over1k_part4_0@s=bob carson +PREHOOK: Input: default@over1k_part4_0@s=bob davidson +PREHOOK: Input: default@over1k_part4_0@s=bob ellison +PREHOOK: Input: default@over1k_part4_0@s=bob falkner +PREHOOK: Input: default@over1k_part4_0@s=bob garcia +PREHOOK: Input: default@over1k_part4_0@s=bob hernandez +PREHOOK: Input: default@over1k_part4_0@s=bob ichabod +PREHOOK: Input: default@over1k_part4_0@s=bob king +PREHOOK: Input: default@over1k_part4_0@s=bob laertes +PREHOOK: Input: default@over1k_part4_0@s=bob miller +PREHOOK: Input: default@over1k_part4_0@s=bob ovid +PREHOOK: Input: default@over1k_part4_0@s=bob polk +PREHOOK: Input: default@over1k_part4_0@s=bob quirinius +PREHOOK: Input: default@over1k_part4_0@s=bob steinbeck +PREHOOK: Input: default@over1k_part4_0@s=bob van buren +PREHOOK: Input: default@over1k_part4_0@s=bob white +PREHOOK: Input: default@over1k_part4_0@s=bob xylophone +PREHOOK: Input: default@over1k_part4_0@s=bob young +PREHOOK: Input: default@over1k_part4_0@s=bob zipper +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from over1k_part4_0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_part4_0 +POSTHOOK: Input: default@over1k_part4_0@s=bob brown +POSTHOOK: Input: default@over1k_part4_0@s=bob carson +POSTHOOK: Input: default@over1k_part4_0@s=bob davidson +POSTHOOK: Input: default@over1k_part4_0@s=bob ellison +POSTHOOK: Input: default@over1k_part4_0@s=bob falkner +POSTHOOK: Input: default@over1k_part4_0@s=bob garcia +POSTHOOK: Input: default@over1k_part4_0@s=bob hernandez +POSTHOOK: Input: default@over1k_part4_0@s=bob ichabod +POSTHOOK: Input: default@over1k_part4_0@s=bob king +POSTHOOK: Input: default@over1k_part4_0@s=bob laertes +POSTHOOK: Input: default@over1k_part4_0@s=bob miller +POSTHOOK: Input: default@over1k_part4_0@s=bob ovid +POSTHOOK: Input: default@over1k_part4_0@s=bob polk +POSTHOOK: Input: default@over1k_part4_0@s=bob quirinius +POSTHOOK: Input: default@over1k_part4_0@s=bob steinbeck +POSTHOOK: Input: default@over1k_part4_0@s=bob van buren +POSTHOOK: Input: default@over1k_part4_0@s=bob white +POSTHOOK: Input: default@over1k_part4_0@s=bob xylophone +POSTHOOK: Input: default@over1k_part4_0@s=bob young +POSTHOOK: Input: default@over1k_part4_0@s=bob zipper +#### A masked pattern was here #### +41 +PREHOOK: query: select count(1) from over1k_part4_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_part4_1 +PREHOOK: Input: default@over1k_part4_1@s=bob brown +PREHOOK: Input: default@over1k_part4_1@s=bob carson +PREHOOK: Input: default@over1k_part4_1@s=bob davidson +PREHOOK: Input: default@over1k_part4_1@s=bob ellison +PREHOOK: Input: default@over1k_part4_1@s=bob falkner +PREHOOK: Input: default@over1k_part4_1@s=bob garcia +PREHOOK: Input: default@over1k_part4_1@s=bob hernandez +PREHOOK: Input: default@over1k_part4_1@s=bob ichabod +PREHOOK: Input: default@over1k_part4_1@s=bob king +PREHOOK: Input: default@over1k_part4_1@s=bob laertes +PREHOOK: Input: default@over1k_part4_1@s=bob miller +PREHOOK: Input: default@over1k_part4_1@s=bob ovid +PREHOOK: Input: default@over1k_part4_1@s=bob polk +PREHOOK: Input: default@over1k_part4_1@s=bob quirinius +PREHOOK: Input: default@over1k_part4_1@s=bob steinbeck +PREHOOK: Input: default@over1k_part4_1@s=bob van buren +PREHOOK: Input: default@over1k_part4_1@s=bob white +PREHOOK: Input: default@over1k_part4_1@s=bob xylophone +PREHOOK: Input: default@over1k_part4_1@s=bob young +PREHOOK: Input: default@over1k_part4_1@s=bob zipper +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from over1k_part4_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_part4_1 +POSTHOOK: Input: default@over1k_part4_1@s=bob brown +POSTHOOK: Input: default@over1k_part4_1@s=bob carson +POSTHOOK: Input: default@over1k_part4_1@s=bob davidson +POSTHOOK: Input: default@over1k_part4_1@s=bob ellison +POSTHOOK: Input: default@over1k_part4_1@s=bob falkner +POSTHOOK: Input: default@over1k_part4_1@s=bob garcia +POSTHOOK: Input: default@over1k_part4_1@s=bob hernandez +POSTHOOK: Input: default@over1k_part4_1@s=bob ichabod +POSTHOOK: Input: default@over1k_part4_1@s=bob king +POSTHOOK: Input: default@over1k_part4_1@s=bob laertes +POSTHOOK: Input: default@over1k_part4_1@s=bob miller +POSTHOOK: Input: default@over1k_part4_1@s=bob ovid +POSTHOOK: Input: default@over1k_part4_1@s=bob polk +POSTHOOK: Input: default@over1k_part4_1@s=bob quirinius +POSTHOOK: Input: default@over1k_part4_1@s=bob steinbeck +POSTHOOK: Input: default@over1k_part4_1@s=bob van buren +POSTHOOK: Input: default@over1k_part4_1@s=bob white +POSTHOOK: Input: default@over1k_part4_1@s=bob xylophone +POSTHOOK: Input: default@over1k_part4_1@s=bob young +POSTHOOK: Input: default@over1k_part4_1@s=bob zipper +#### A masked pattern was here #### +41 PREHOOK: query: drop table over1k_n3 PREHOOK: type: DROPTABLE PREHOOK: Input: default@over1k_n3 diff --git a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index 18dca4921f..7e6e88bd08 100644 --- a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1916,7 +1916,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: oft - Statistics: Num rows: 12288 Data size: 13243096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12288 Data size: 1884148 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:csmallint:smallint, 1:cint:int, 2:cbigint:bigint, 3:cfloat:float, 4:cdouble:double, 5:cstring1:string, 6:cchar1:char(255), 7:cvchar1:varchar(255), 8:cboolean1:boolean, 9:cboolean2:boolean, 10:ctinyint:tinyint, 11:ROW__ID:struct] @@ -1939,7 +1939,7 @@ STAGE PLANS: outputColumnNames: _col1, _col6, _col7, _col10 input vertices: 1 Map 2 - Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col10 (type: tinyint), _col1 (type: int), _col6 (type: char(255)), _col7 (type: varchar(255)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -1947,13 +1947,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [10, 1, 6, 7] - Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out index 89c0c57642..5bba6b0a37 100644 --- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -153,11 +153,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: llap_stats - Statistics: Num rows: 10 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint - Statistics: Num rows: 10 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(ctinyint, 'hll'), compute_stats(csmallint, 'hll') keys: cint (type: int) diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out index 9255be7dce..9e0c053cd9 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out @@ -41,6 +41,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -52,14 +53,50 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out index d943b40374..c03d7b8e37 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out @@ -65,6 +65,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -76,6 +77,22 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Output Operator key expressions: _col2 (type: int) sort order: + @@ -84,6 +101,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -172,6 +209,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -183,6 +221,22 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Output Operator key expressions: _col2 (type: int) sort order: + @@ -191,6 +245,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -324,6 +398,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -335,6 +410,22 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Output Operator key expressions: _col2 (type: int) sort order: + @@ -343,6 +434,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out index 7265292d09..ea6706c24f 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out @@ -39,6 +39,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -50,6 +51,22 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string), CAST( _col3 AS STRING) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: struct), _col4 (type: struct) Reduce Output Operator key expressions: _col2 (type: int), _col3 (type: int) sort order: ++ @@ -58,6 +75,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out index 634f1399bf..583ee9670a 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out @@ -41,6 +41,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -52,6 +53,22 @@ STAGE PLANS: expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: timestamp), subtype (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col5 (type: double) sort order: + @@ -60,6 +77,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: all inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -237,6 +274,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -248,6 +286,22 @@ STAGE PLANS: expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: timestamp), subtype (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col5 (type: double) sort order: + @@ -256,6 +310,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: all inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out index a912216dc5..3bd001a9fd 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out @@ -65,6 +65,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -76,6 +77,22 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Output Operator key expressions: _col2 (type: int) sort order: + @@ -84,6 +101,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -172,6 +209,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -183,6 +221,22 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Output Operator key expressions: _col2 (type: int) sort order: + @@ -191,6 +245,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -324,6 +398,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -335,6 +410,22 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Output Operator key expressions: _col2 (type: int) sort order: + @@ -343,6 +434,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out index ee13fafc61..708652b491 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out @@ -41,6 +41,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -52,6 +53,22 @@ STAGE PLANS: expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: timestamp), subtype (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col5 (type: double) sort order: + @@ -60,6 +77,26 @@ STAGE PLANS: Execution mode: llap LLAP IO: all inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_dml.q.out b/ql/src/test/results/clientpositive/llap/tez_dml.q.out index a9b3172777..ef30b307aa 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dml.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dml.q.out @@ -455,6 +455,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -466,14 +467,50 @@ STAGE PLANS: expressions: value (type: string), cnt (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: c, d + Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c, 'hll') + keys: d (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + Map-reduce partition columns: _col1 (type: bigint) value expressions: _col0 (type: string) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index 28977d78f4..ca8232e33d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1316,7 +1316,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_sales - Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1326,7 +1326,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [16] - Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -1338,7 +1338,7 @@ STAGE PLANS: keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1347,7 +1347,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1379,7 +1379,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 1755802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 170 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: @@ -1391,14 +1391,14 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1420,13 +1420,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 2ab11daa46..b7db02802f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -979,7 +979,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_date - Statistics: Num rows: 137 Data size: 94360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -989,13 +989,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1242,7 +1242,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_date - Statistics: Num rows: 137 Data size: 94360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1252,7 +1252,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 94360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -1260,7 +1260,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 137 Data size: 94360 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized, llap @@ -1290,13 +1290,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -1304,7 +1304,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Reducer 3 @@ -1323,19 +1323,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2018,7 +2018,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_timestamp - Statistics: Num rows: 137 Data size: 96472 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -2028,13 +2028,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2281,7 +2281,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_timestamp - Statistics: Num rows: 137 Data size: 96472 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -2291,7 +2291,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 96472 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ @@ -2299,7 +2299,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 137 Data size: 96472 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized, llap @@ -2329,13 +2329,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ @@ -2343,7 +2343,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Reducer 3 @@ -2362,19 +2362,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3580,7 +3580,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_date - Statistics: Num rows: 137 Data size: 63269 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -3590,13 +3590,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3843,7 +3843,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_date - Statistics: Num rows: 137 Data size: 63269 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -3853,7 +3853,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 63269 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -3861,7 +3861,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 137 Data size: 63269 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized, llap @@ -3891,13 +3891,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 62584 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -3905,7 +3905,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Reducer 3 @@ -3924,19 +3924,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4619,7 +4619,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -4629,13 +4629,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4882,7 +4882,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -4892,7 +4892,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ @@ -4900,7 +4900,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 137 Data size: 63189 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized, llap @@ -4930,13 +4930,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 62504 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: timestamp) sort order: ++ @@ -4944,7 +4944,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Reducer 3 @@ -4963,19 +4963,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 11800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/load_dyn_part1.q.out index ae0779cb01..6b48d8dab6 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part1.q.out @@ -65,11 +65,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0, Stage-5 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-1, Stage-5 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -85,11 +87,28 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -97,12 +116,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -131,7 +194,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1_n0 - Stage: Stage-3 + Stage: Stage-4 Stats Work Basic Stats Work: Column Stats Desc: @@ -139,7 +202,45 @@ STAGE PLANS: Column Types: string, string Table: default.nzhang_part1_n0 - Stage: Stage-4 + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2_n0 + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -176,14 +277,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2_n0 - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.nzhang_part2_n0 - PREHOOK: query: from srcpart insert overwrite table nzhang_part1_n0 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part2_n0 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git a/ql/src/test/results/clientpositive/load_dyn_part10.q.out b/ql/src/test/results/clientpositive/load_dyn_part10.q.out index c6d9b6c4ff..a5b75bc5b7 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part10.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part10.q.out @@ -50,8 +50,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -65,11 +66,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -98,7 +144,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part10 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/load_dyn_part3.q.out b/ql/src/test/results/clientpositive/load_dyn_part3.q.out index ebf3c52f93..440e39b197 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part3.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part3.q.out @@ -52,8 +52,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -67,11 +68,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -100,7 +146,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/load_dyn_part4.q.out b/ql/src/test/results/clientpositive/load_dyn_part4.q.out index e30d6aec51..f102e0e2c4 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part4.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part4.q.out @@ -62,8 +62,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -77,11 +78,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -110,7 +156,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part4 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/load_dyn_part8.q.out b/ql/src/test/results/clientpositive/load_dyn_part8.q.out index 1037a339d1..86c29103c0 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part8.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part8.q.out @@ -57,11 +57,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-1, Stage-5 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -79,14 +81,43 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - tag: -1 - value expressions: _col0 (type: string), _col1 (type: string) - auto parallelism: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -95,6 +126,34 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false File Output Operator compressed: false GlobalTableId: 0 @@ -113,7 +172,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Execution mode: vectorized Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -319,6 +377,82 @@ STAGE PLANS: /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) @@ -382,12 +516,99 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part8_n0 - Stage: Stage-3 + Stage: Stage-4 Stats Work Basic Stats Work: #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-7 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8_n0 + Is Table Level Stats: false + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -406,7 +627,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10004 + base file name: -mr-10006 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -493,16 +714,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part8_n0 - Stage: Stage-5 - Stats Work - Basic Stats Work: -#### A masked pattern was here #### - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.nzhang_part8_n0 - Is Table Level Stats: false - PREHOOK: query: from srcpart insert overwrite table nzhang_part8_n0 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part8_n0 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git a/ql/src/test/results/clientpositive/load_dyn_part9.q.out b/ql/src/test/results/clientpositive/load_dyn_part9.q.out index 4e272bbe6b..af34de5e38 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part9.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part9.q.out @@ -50,8 +50,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -65,11 +66,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -98,7 +144,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part9 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/merge3.q.out b/ql/src/test/results/clientpositive/merge3.q.out index 0b4da94db1..fc115bdff5 100644 --- a/ql/src/test/results/clientpositive/merge3.q.out +++ b/ql/src/test/results/clientpositive/merge3.q.out @@ -2394,13 +2394,14 @@ FROM `default`.`merge_src_part` WHERE `ds` IS NOT NULL STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -2415,15 +2416,43 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - tag: -1 - value expressions: _col0 (type: string), _col1 (type: string) - auto parallelism: false - Execution mode: vectorized + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2527,6 +2556,82 @@ STAGE PLANS: /merge_src_part/ds=2008-04-08 [merge_src_part] /merge_src_part/ds=2008-04-09 [merge_src_part] Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col2 (type: string) + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) @@ -2562,10 +2667,10 @@ STAGE PLANS: GatherStats: true MultiFileSpray: false - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -2598,7 +2703,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_src_part2 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: #### A masked pattern was here #### @@ -2608,7 +2713,7 @@ STAGE PLANS: Table: default.merge_src_part2 Is Table Level Stats: false - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -2645,7 +2750,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10002 + base file name: -ext-10003 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -2686,7 +2791,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2723,7 +2828,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10002 + base file name: -ext-10003 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -2764,7 +2869,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/merge4.q.out b/ql/src/test/results/clientpositive/merge4.q.out index fcc557a5ae..621601d2bf 100644 --- a/ql/src/test/results/clientpositive/merge4.q.out +++ b/ql/src/test/results/clientpositive/merge4.q.out @@ -21,13 +21,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -41,11 +42,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -61,10 +107,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -83,7 +129,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -91,7 +137,7 @@ STAGE PLANS: Column Types: string, string Table: default.nzhang_part - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -103,7 +149,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -115,7 +161,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out index 6e75c6e062..c17d2c3a14 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out @@ -60,8 +60,9 @@ POSTHOOK: Input: default@srcpart_merge_dp_n1 POSTHOOK: Input: default@srcpart_merge_dp_n1@ds=2008-04-08/hr=11 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -75,11 +76,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 99 Data size: 58120 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 99 Data size: 58120 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 58120 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 99 Data size: 58120 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 49 Data size: 28766 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 49 Data size: 28766 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 49 Data size: 28766 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) @@ -107,7 +153,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n1 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -1343,13 +1389,14 @@ POSTHOOK: Input: default@srcpart_merge_dp_n1 POSTHOOK: Input: default@srcpart_merge_dp_n1@ds=2008-04-08/hr=11 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -1363,11 +1410,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 99 Data size: 58120 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 99 Data size: 58120 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 58120 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 99 Data size: 58120 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 49 Data size: 28766 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 49 Data size: 28766 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 49 Data size: 28766 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) @@ -1382,10 +1474,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n1 - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -1404,7 +1496,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n1 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -1412,7 +1504,7 @@ STAGE PLANS: Column Types: string, string Table: default.merge_dynamic_part_n1 - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1424,7 +1516,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n1 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1436,7 +1528,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n1 - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out index a98bfbffec..4e85457e44 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out @@ -79,13 +79,14 @@ POSTHOOK: Input: default@srcpart_merge_dp_n0@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart_merge_dp_n0@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -99,11 +100,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 297 Data size: 174150 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 297 Data size: 174150 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 297 Data size: 174150 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 297 Data size: 174150 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 148 Data size: 86781 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 148 Data size: 86781 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 148 Data size: 86781 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) @@ -118,10 +164,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n0 - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -140,7 +186,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n0 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -148,7 +194,7 @@ STAGE PLANS: Column Types: string, string Table: default.merge_dynamic_part_n0 - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -160,7 +206,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -172,7 +218,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n0 - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out index cbc8afd104..be25dd8905 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out @@ -143,13 +143,14 @@ POSTHOOK: Input: default@srcpart_merge_dp_n2@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart_merge_dp_n2@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -163,11 +164,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 594 Data size: 348300 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 594 Data size: 348300 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 594 Data size: 348300 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 594 Data size: 348300 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 297 Data size: 174150 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 297 Data size: 174150 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 297 Data size: 174150 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) @@ -182,10 +228,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n2 - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -204,7 +250,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n2 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -212,7 +258,7 @@ STAGE PLANS: Column Types: string, string Table: default.merge_dynamic_part_n2 - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -224,7 +270,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n2 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -236,7 +282,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part_n2 - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index 8a4b96f38c..29ce8f4014 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -140,13 +140,14 @@ POSTHOOK: Input: default@srcpart_merge_dp_rc_n1@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart_merge_dp_rc_n1@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -160,11 +161,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 2.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) @@ -179,10 +225,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part_n3 - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -201,7 +247,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part_n3 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -209,21 +255,21 @@ STAGE PLANS: Column Types: string, string Table: default.merge_dynamic_part_n3 - Stage: Stage-3 + Stage: Stage-4 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-5 + Stage: Stage-6 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index db782cbd07..89265fd4a7 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -116,13 +116,14 @@ POSTHOOK: Input: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -136,11 +137,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) @@ -155,10 +201,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -177,7 +223,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -185,21 +231,21 @@ STAGE PLANS: Column Types: string, string Table: default.merge_dynamic_part - Stage: Stage-3 + Stage: Stage-4 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-5 + Stage: Stage-6 Merge File Operator Map Operator Tree: RCFile Merge Operator merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/orc_merge10.q.out b/ql/src/test/results/clientpositive/orc_merge10.q.out index 0184fa9c1b..3e0a3bd4eb 100644 --- a/ql/src/test/results/clientpositive/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/orc_merge10.q.out @@ -55,8 +55,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -69,11 +70,56 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - value expressions: _col0 (type: int), _col1 (type: string) + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) @@ -101,7 +147,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -143,13 +189,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -162,11 +209,56 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - value expressions: _col0 (type: int), _col1 (type: string) + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) @@ -181,10 +273,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -203,7 +295,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -211,7 +303,7 @@ STAGE PLANS: Column Types: int, string Table: default.orcfile_merge1b - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -223,7 +315,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -235,7 +327,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true @@ -275,13 +367,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -294,11 +387,56 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - value expressions: _col0 (type: int), _col1 (type: string) + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) @@ -313,10 +451,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -335,7 +473,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -343,21 +481,21 @@ STAGE PLANS: Column Types: int, string Table: default.orcfile_merge1c - Stage: Stage-3 + Stage: Stage-4 Merge File Operator Map Operator Tree: ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - Stage: Stage-5 + Stage: Stage-6 Merge File Operator Map Operator Tree: ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/orc_merge2.q.out b/ql/src/test/results/clientpositive/orc_merge2.q.out index b91d9812fc..7b58415853 100644 --- a/ql/src/test/results/clientpositive/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge2.q.out @@ -29,8 +29,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -43,11 +44,56 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - value expressions: _col0 (type: int), _col1 (type: string) + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string), CAST( _col3 AS STRING) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col3 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) @@ -76,7 +122,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a_n0 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out index d26cf05388..561bec7816 100644 --- a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out @@ -55,8 +55,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -69,11 +70,56 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - value expressions: _col0 (type: int), _col1 (type: string) + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) @@ -101,7 +147,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1_n0 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -143,13 +189,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -162,11 +209,56 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - value expressions: _col0 (type: int), _col1 (type: string) + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) @@ -181,10 +273,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b_n0 - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -203,7 +295,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b_n0 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -211,7 +303,7 @@ STAGE PLANS: Column Types: int, string Table: default.orcfile_merge1b_n0 - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -223,7 +315,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b_n0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -235,7 +327,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b_n0 - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true @@ -275,13 +367,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -294,11 +387,56 @@ STAGE PLANS: expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - value expressions: _col0 (type: int), _col1 (type: string) + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) @@ -313,10 +451,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c_n0 - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -335,7 +473,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c_n0 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: @@ -343,21 +481,21 @@ STAGE PLANS: Column Types: int, string Table: default.orcfile_merge1c_n0 - Stage: Stage-3 + Stage: Stage-4 Merge File Operator Map Operator Tree: ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - Stage: Stage-5 + Stage: Stage-6 Merge File Operator Map Operator Tree: ORC File Merge Operator merge level: stripe input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out index 1a4ff2f3fc..977ba4bc89 100644 --- a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out @@ -31,8 +31,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_merge5 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -45,11 +46,56 @@ STAGE PLANS: expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: timestamp), subtype (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 22980 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: double) - sort order: + - Map-reduce partition columns: _col5 (type: double) - value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp) + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 1 Data size: 22980 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 22980 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 22980 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 22980 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 22980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 22980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: double) + sort order: + + Map-reduce partition columns: _col5 (type: double) + value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: double), VALUE._col3 (type: decimal(38,0)), VALUE._col4 (type: timestamp), KEY._col5 (type: double) @@ -76,7 +122,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: diff --git a/ql/src/test/results/clientpositive/spark/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/spark/dynpart_sort_optimization.q.out index ddc5106bf5..7c40739235 100644 --- a/ql/src/test/results/clientpositive/spark/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/spark/dynpart_sort_optimization.q.out @@ -3494,6 +3494,374 @@ STAGE PLANS: Stats Work Basic Stats Work: +PREHOOK: query: create table over1k_part4_0(i int) partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part4_0 +POSTHOOK: query: create table over1k_part4_0(i int) partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part4_0 +PREHOOK: query: create table over1k_part4_1(i int) partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: create table over1k_part4_1(i int) partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over1k_part4_1 +PREHOOK: query: EXPLAIN +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part4_0 +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: EXPLAIN +WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: over1k_n3 + filterExpr: ((s like 'bob%') and (i > 1)) (type: boolean) + Statistics: Num rows: 1049 Data size: 105949 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((i > 1) and (s like 'bob%')) (type: boolean) + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), s (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 + 1) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: over1k_n3 + filterExpr: ((s like 'bob%') and (i > 1)) (type: boolean) + Statistics: Num rows: 1049 Data size: 105949 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((i > 1) and (s like 'bob%')) (type: boolean) + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int), s (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 + 0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY._col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-0 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_0 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + + Stage: Stage-1 + Move Operator + tables: + partition: + s + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part4_1 + + Stage: Stage-4 + Stats Work + Basic Stats Work: + +PREHOOK: query: WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_n3 +PREHOOK: Output: default@over1k_part4_0 +PREHOOK: Output: default@over1k_part4_1 +POSTHOOK: query: WITH CTE AS ( +select i, s from over1k_n3 where s like 'bob%' +) +FROM ( +select * from CTE where i > 1 ORDER BY s +) src1k +insert overwrite table over1k_part4_0 partition(s) +select i+1, s +insert overwrite table over1k_part4_1 partition(s) +select i+0, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_n3 +POSTHOOK: Output: default@over1k_part4_0@s=bob brown +POSTHOOK: Output: default@over1k_part4_0@s=bob carson +POSTHOOK: Output: default@over1k_part4_0@s=bob davidson +POSTHOOK: Output: default@over1k_part4_0@s=bob ellison +POSTHOOK: Output: default@over1k_part4_0@s=bob falkner +POSTHOOK: Output: default@over1k_part4_0@s=bob garcia +POSTHOOK: Output: default@over1k_part4_0@s=bob hernandez +POSTHOOK: Output: default@over1k_part4_0@s=bob ichabod +POSTHOOK: Output: default@over1k_part4_0@s=bob king +POSTHOOK: Output: default@over1k_part4_0@s=bob laertes +POSTHOOK: Output: default@over1k_part4_0@s=bob miller +POSTHOOK: Output: default@over1k_part4_0@s=bob ovid +POSTHOOK: Output: default@over1k_part4_0@s=bob polk +POSTHOOK: Output: default@over1k_part4_0@s=bob quirinius +POSTHOOK: Output: default@over1k_part4_0@s=bob steinbeck +POSTHOOK: Output: default@over1k_part4_0@s=bob van buren +POSTHOOK: Output: default@over1k_part4_0@s=bob white +POSTHOOK: Output: default@over1k_part4_0@s=bob xylophone +POSTHOOK: Output: default@over1k_part4_0@s=bob young +POSTHOOK: Output: default@over1k_part4_0@s=bob zipper +POSTHOOK: Output: default@over1k_part4_1@s=bob brown +POSTHOOK: Output: default@over1k_part4_1@s=bob carson +POSTHOOK: Output: default@over1k_part4_1@s=bob davidson +POSTHOOK: Output: default@over1k_part4_1@s=bob ellison +POSTHOOK: Output: default@over1k_part4_1@s=bob falkner +POSTHOOK: Output: default@over1k_part4_1@s=bob garcia +POSTHOOK: Output: default@over1k_part4_1@s=bob hernandez +POSTHOOK: Output: default@over1k_part4_1@s=bob ichabod +POSTHOOK: Output: default@over1k_part4_1@s=bob king +POSTHOOK: Output: default@over1k_part4_1@s=bob laertes +POSTHOOK: Output: default@over1k_part4_1@s=bob miller +POSTHOOK: Output: default@over1k_part4_1@s=bob ovid +POSTHOOK: Output: default@over1k_part4_1@s=bob polk +POSTHOOK: Output: default@over1k_part4_1@s=bob quirinius +POSTHOOK: Output: default@over1k_part4_1@s=bob steinbeck +POSTHOOK: Output: default@over1k_part4_1@s=bob van buren +POSTHOOK: Output: default@over1k_part4_1@s=bob white +POSTHOOK: Output: default@over1k_part4_1@s=bob xylophone +POSTHOOK: Output: default@over1k_part4_1@s=bob young +POSTHOOK: Output: default@over1k_part4_1@s=bob zipper +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob brown).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob carson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob davidson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob ellison).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob falkner).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob garcia).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob hernandez).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob ichabod).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob king).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob laertes).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob miller).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob ovid).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob polk).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob quirinius).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob steinbeck).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob van buren).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob white).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob xylophone).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob young).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_0 PARTITION(s=bob zipper).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob brown).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob carson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob davidson).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob ellison).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob falkner).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob garcia).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob hernandez).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob ichabod).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob king).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob laertes).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob miller).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob ovid).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob polk).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob quirinius).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob steinbeck).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob van buren).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob white).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob xylophone).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob young).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: over1k_part4_1 PARTITION(s=bob zipper).i EXPRESSION [(over1k_n3)over1k_n3.FieldSchema(name:i, type:int, comment:null), ] +PREHOOK: query: select count(1) from over1k_part4_0 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_part4_0 +PREHOOK: Input: default@over1k_part4_0@s=bob brown +PREHOOK: Input: default@over1k_part4_0@s=bob carson +PREHOOK: Input: default@over1k_part4_0@s=bob davidson +PREHOOK: Input: default@over1k_part4_0@s=bob ellison +PREHOOK: Input: default@over1k_part4_0@s=bob falkner +PREHOOK: Input: default@over1k_part4_0@s=bob garcia +PREHOOK: Input: default@over1k_part4_0@s=bob hernandez +PREHOOK: Input: default@over1k_part4_0@s=bob ichabod +PREHOOK: Input: default@over1k_part4_0@s=bob king +PREHOOK: Input: default@over1k_part4_0@s=bob laertes +PREHOOK: Input: default@over1k_part4_0@s=bob miller +PREHOOK: Input: default@over1k_part4_0@s=bob ovid +PREHOOK: Input: default@over1k_part4_0@s=bob polk +PREHOOK: Input: default@over1k_part4_0@s=bob quirinius +PREHOOK: Input: default@over1k_part4_0@s=bob steinbeck +PREHOOK: Input: default@over1k_part4_0@s=bob van buren +PREHOOK: Input: default@over1k_part4_0@s=bob white +PREHOOK: Input: default@over1k_part4_0@s=bob xylophone +PREHOOK: Input: default@over1k_part4_0@s=bob young +PREHOOK: Input: default@over1k_part4_0@s=bob zipper +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from over1k_part4_0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_part4_0 +POSTHOOK: Input: default@over1k_part4_0@s=bob brown +POSTHOOK: Input: default@over1k_part4_0@s=bob carson +POSTHOOK: Input: default@over1k_part4_0@s=bob davidson +POSTHOOK: Input: default@over1k_part4_0@s=bob ellison +POSTHOOK: Input: default@over1k_part4_0@s=bob falkner +POSTHOOK: Input: default@over1k_part4_0@s=bob garcia +POSTHOOK: Input: default@over1k_part4_0@s=bob hernandez +POSTHOOK: Input: default@over1k_part4_0@s=bob ichabod +POSTHOOK: Input: default@over1k_part4_0@s=bob king +POSTHOOK: Input: default@over1k_part4_0@s=bob laertes +POSTHOOK: Input: default@over1k_part4_0@s=bob miller +POSTHOOK: Input: default@over1k_part4_0@s=bob ovid +POSTHOOK: Input: default@over1k_part4_0@s=bob polk +POSTHOOK: Input: default@over1k_part4_0@s=bob quirinius +POSTHOOK: Input: default@over1k_part4_0@s=bob steinbeck +POSTHOOK: Input: default@over1k_part4_0@s=bob van buren +POSTHOOK: Input: default@over1k_part4_0@s=bob white +POSTHOOK: Input: default@over1k_part4_0@s=bob xylophone +POSTHOOK: Input: default@over1k_part4_0@s=bob young +POSTHOOK: Input: default@over1k_part4_0@s=bob zipper +#### A masked pattern was here #### +41 +PREHOOK: query: select count(1) from over1k_part4_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@over1k_part4_1 +PREHOOK: Input: default@over1k_part4_1@s=bob brown +PREHOOK: Input: default@over1k_part4_1@s=bob carson +PREHOOK: Input: default@over1k_part4_1@s=bob davidson +PREHOOK: Input: default@over1k_part4_1@s=bob ellison +PREHOOK: Input: default@over1k_part4_1@s=bob falkner +PREHOOK: Input: default@over1k_part4_1@s=bob garcia +PREHOOK: Input: default@over1k_part4_1@s=bob hernandez +PREHOOK: Input: default@over1k_part4_1@s=bob ichabod +PREHOOK: Input: default@over1k_part4_1@s=bob king +PREHOOK: Input: default@over1k_part4_1@s=bob laertes +PREHOOK: Input: default@over1k_part4_1@s=bob miller +PREHOOK: Input: default@over1k_part4_1@s=bob ovid +PREHOOK: Input: default@over1k_part4_1@s=bob polk +PREHOOK: Input: default@over1k_part4_1@s=bob quirinius +PREHOOK: Input: default@over1k_part4_1@s=bob steinbeck +PREHOOK: Input: default@over1k_part4_1@s=bob van buren +PREHOOK: Input: default@over1k_part4_1@s=bob white +PREHOOK: Input: default@over1k_part4_1@s=bob xylophone +PREHOOK: Input: default@over1k_part4_1@s=bob young +PREHOOK: Input: default@over1k_part4_1@s=bob zipper +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from over1k_part4_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over1k_part4_1 +POSTHOOK: Input: default@over1k_part4_1@s=bob brown +POSTHOOK: Input: default@over1k_part4_1@s=bob carson +POSTHOOK: Input: default@over1k_part4_1@s=bob davidson +POSTHOOK: Input: default@over1k_part4_1@s=bob ellison +POSTHOOK: Input: default@over1k_part4_1@s=bob falkner +POSTHOOK: Input: default@over1k_part4_1@s=bob garcia +POSTHOOK: Input: default@over1k_part4_1@s=bob hernandez +POSTHOOK: Input: default@over1k_part4_1@s=bob ichabod +POSTHOOK: Input: default@over1k_part4_1@s=bob king +POSTHOOK: Input: default@over1k_part4_1@s=bob laertes +POSTHOOK: Input: default@over1k_part4_1@s=bob miller +POSTHOOK: Input: default@over1k_part4_1@s=bob ovid +POSTHOOK: Input: default@over1k_part4_1@s=bob polk +POSTHOOK: Input: default@over1k_part4_1@s=bob quirinius +POSTHOOK: Input: default@over1k_part4_1@s=bob steinbeck +POSTHOOK: Input: default@over1k_part4_1@s=bob van buren +POSTHOOK: Input: default@over1k_part4_1@s=bob white +POSTHOOK: Input: default@over1k_part4_1@s=bob xylophone +POSTHOOK: Input: default@over1k_part4_1@s=bob young +POSTHOOK: Input: default@over1k_part4_1@s=bob zipper +#### A masked pattern was here #### +41 PREHOOK: query: drop table over1k_n3 PREHOOK: type: DROPTABLE PREHOOK: Input: default@over1k_n3 diff --git a/ql/src/test/results/clientpositive/stats4.q.out b/ql/src/test/results/clientpositive/stats4.q.out index b1edea1535..e562087b3b 100644 --- a/ql/src/test/results/clientpositive/stats4.q.out +++ b/ql/src/test/results/clientpositive/stats4.q.out @@ -56,11 +56,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0, Stage-5 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-1, Stage-5 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -76,11 +78,28 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -88,12 +107,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string), _col3 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -122,7 +185,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 - Stage: Stage-3 + Stage: Stage-4 Stats Work Basic Stats Work: Column Stats Desc: @@ -130,7 +193,45 @@ STAGE PLANS: Column Types: string, string Table: default.nzhang_part1 - Stage: Stage-4 + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -167,14 +268,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 - Stage: Stage-5 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.nzhang_part2 - PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part2 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git a/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out b/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out index 431ff3ff71..269684d5e0 100644 --- a/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out @@ -15,8 +15,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -33,11 +34,56 @@ STAGE PLANS: expressions: 'no_such_value' (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -65,7 +111,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable_n7 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: Column Stats Desc: