From c7e25038ae908354c1b7d488580a24faa5b41cde Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Fri, 16 Sep 2016 18:15:26 -0700 Subject: [PATCH] HIVE-14783 : bucketing column should be part of sorting for delete/update operation when spdo is on --- .../ql/optimizer/SortedDynPartitionOptimizer.java | 3 ++- .../dynpart_sort_optimization_acid.q.out | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index c743bda..8b4af72 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -440,7 +441,7 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, int numPartAndBuck = partitionPositions.size(); keyColsPosInVal.addAll(partitionPositions); - if (!bucketColumns.isEmpty()) { + if (!bucketColumns.isEmpty() || writeType == Operation.DELETE || writeType == Operation.UPDATE) { keyColsPosInVal.add(-1); numPartAndBuck += 1; } diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out index ed3f8e9..1838d6a 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out @@ -416,8 +416,8 @@ STAGE PLANS: outputColumnNames: _col0, _col3 Statistics: Num rows: 892 Data size: 2676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col0 (type: struct) - sort order: ++ + key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct) + sort order: +++ Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 892 Data size: 2676 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: @@ -1036,8 +1036,8 @@ STAGE PLANS: outputColumnNames: _col0, _col4 Statistics: Num rows: 1517 Data size: 4551 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '2008-04-08' (type: string), _col4 (type: int), _col0 (type: struct) - sort order: +++ + key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) + sort order: ++++ Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) Statistics: Num rows: 1517 Data size: 4551 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: @@ -1146,8 +1146,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int), _col0 (type: struct) - sort order: +++ + key expressions: _col1 (type: string), _col2 (type: int), '_bucket_number' (type: string), _col0 (type: struct) + sort order: ++++ Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: @@ -1320,8 +1320,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 23 Data size: 2322 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), _col0 (type: struct) - sort order: +++ + key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) + sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) Statistics: Num rows: 23 Data size: 2322 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), 'bar' (type: string) @@ -1400,8 +1400,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 45 Data size: 4550 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), _col0 (type: struct) - sort order: +++ + key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) + sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) Statistics: Num rows: 45 Data size: 4550 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), 'bar' (type: string) -- 1.7.12.4 (Apple Git-37)