diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index cbb59ae..4d63209 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -395,7 +395,8 @@ public ReduceSinkDesc getReduceSinkDesc(List partitionPositions, // should honor the ordering of records provided by ORDER BY in SELECT statement ReduceSinkOperator parentRSOp = OperatorUtils.findSingleOperatorUpstream(parent, ReduceSinkOperator.class); - if (parentRSOp != null) { + boolean isOrderBy = parseCtx.getQueryProperties().hasOrderBy(); + if (parentRSOp != null && isOrderBy) { String parentRSOpOrder = parentRSOp.getConf().getOrder(); if (parentRSOpOrder != null && !parentRSOpOrder.isEmpty() && sortPositions.isEmpty()) { newKeyCols.addAll(parentRSOp.getConf().getKeyCols()); diff --git ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q index ce5a5b7..c41cd0c 100644 --- ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q +++ ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q @@ -110,6 +110,12 @@ set hive.optimize.sort.dynamic.partition=true; explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i; set hive.optimize.sort.dynamic.partition=false; +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t; +set hive.optimize.sort.dynamic.partition=true; +-- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t; + +set hive.optimize.sort.dynamic.partition=false; insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i; desc formatted over1k_part2_orc partition(ds="foo",t=27); diff --git ql/src/test/queries/clientpositive/dynpart_sort_optimization.q ql/src/test/queries/clientpositive/dynpart_sort_optimization.q index 8c3c68f..76ea4ac 100644 --- ql/src/test/queries/clientpositive/dynpart_sort_optimization.q +++ ql/src/test/queries/clientpositive/dynpart_sort_optimization.q @@ -104,6 +104,12 @@ set hive.optimize.sort.dynamic.partition=true; explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i; set hive.optimize.sort.dynamic.partition=false; +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t; +set hive.optimize.sort.dynamic.partition=true; +-- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t; + +set hive.optimize.sort.dynamic.partition=false; insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i; desc formatted over1k_part2 partition(ds="foo",t=27); diff --git ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out index e0b9bd1..c631561 100644 --- ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out @@ -1408,6 +1408,167 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc diff --git ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out index 941aa9e..b38d308 100644 --- ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out @@ -1311,6 +1311,165 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out index f0bf08d..c4bf5d7 100644 --- ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out @@ -1463,6 +1463,178 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k_orc + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc + + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc diff --git ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out index e2f8673..852aacd 100644 --- ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out +++ ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out @@ -1381,6 +1381,175 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator +PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +PREHOOK: type: QUERY +POSTHOOK: query: -- tests for HIVE-8162, only partition column 't' should be in last RS operator +explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over1k + Statistics: Num rows: 4443 Data size: 106636 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + sort order: +++++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Reducer 3 + Reduce Operator Tree: + Extract + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds foo + t + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 + + Stage: Stage-3 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k