diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index cbb59ae..e3313e9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -395,7 +395,8 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, // should honor the ordering of records provided by ORDER BY in SELECT statement ReduceSinkOperator parentRSOp = OperatorUtils.findSingleOperatorUpstream(parent, ReduceSinkOperator.class); - if (parentRSOp != null) { + boolean isOrderBy = parseCtx.getQB().getParseInfo().getDestToOrderBy().size() > 0; + if (parentRSOp != null && isOrderBy) { String parentRSOpOrder = parentRSOp.getConf().getOrder(); if (parentRSOpOrder != null && !parentRSOpOrder.isEmpty() && sortPositions.isEmpty()) { newKeyCols.addAll(parentRSOp.getConf().getKeyCols()); diff --git ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q index ce5a5b7..78816ae 100644 --- ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q +++ ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q @@ -108,6 +108,13 @@ set hive.optimize.sort.dynamic.partition=false; explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i; set hive.optimize.sort.dynamic.partition=true; explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i; +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27; + +set hive.optimize.sort.dynamic.partition=false; +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t; +set hive.optimize.sort.dynamic.partition=true; +-- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t; set hive.optimize.sort.dynamic.partition=false; insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i; diff --git ql/src/test/queries/clientpositive/dynpart_sort_optimization.q ql/src/test/queries/clientpositive/dynpart_sort_optimization.q index 8c3c68f..e459583 100644 --- ql/src/test/queries/clientpositive/dynpart_sort_optimization.q +++ ql/src/test/queries/clientpositive/dynpart_sort_optimization.q @@ -102,6 +102,13 @@ set hive.optimize.sort.dynamic.partition=false; explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i; set hive.optimize.sort.dynamic.partition=true; explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i; +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27; + +set hive.optimize.sort.dynamic.partition=false; +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t; +set hive.optimize.sort.dynamic.partition=true; +-- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t; set hive.optimize.sort.dynamic.partition=false; insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i; diff --git ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out index 04cf000..09a57ef 100644 --- ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out @@ -1408,6 +1408,254 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 is null or (_col0 = 27)) (type: boolean) + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc diff --git ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out index 941aa9e..549169f 100644 --- ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out @@ -1311,6 +1311,251 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 is null or (_col0 = 27)) (type: boolean) + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out index 53dfb2c..b43f0c9 100644 --- ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out @@ -1463,6 +1463,266 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 is null or (_col0 = 27)) (type: boolean) + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out index e2f8673..5ff3e49 100644 --- ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out +++ ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out @@ -1381,6 +1381,261 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 is null or (_col0 = 27)) (type: boolean) + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k