From ee9ac1accc8029baf9090cc34fa9394ca1b63542 Mon Sep 17 00:00:00 2001 From: Gopal V Date: Thu, 30 Jan 2020 20:53:59 -0800 Subject: [PATCH] HIVE-22796: ACID: Update/Delete operations are implicitly bucketed by 2^12 buckets --- .../ql/optimizer/SortedDynPartitionOptimizer.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 7b3fa372d8..02a867816d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.BucketCodec; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -617,9 +618,18 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols, keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, valueTable, writeType); + + final boolean acidOp = writeType == AcidUtils.Operation.UPDATE + || writeType == AcidUtils.Operation.DELETE; + if (numBuckets == -1 && acidOp) { + // this enables _bucket_number UDF to do an identity transform over + // UDFToInteger(ROW__ID) to get the bucket number out as is + // since the modulo by max bucket-id is a no-op + numBuckets = (BucketCodec.MAX_BUCKET_ID+1); // start from 0 + } + rsConf.setBucketCols(bucketColumns); rsConf.setNumBuckets(numBuckets); - ArrayList signature = new ArrayList<>(); for (int index = 0; index < parent.getSchema().getSignature().size(); index++) { ColumnInfo colInfo = new ColumnInfo(parent.getSchema().getSignature().get(index));