diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java index eb9ded7e78..10d96048d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.io; +import com.google.common.base.Preconditions; + /** * This class makes sense of {@link RecordIdentifier#getBucketProperty()}. Up until ASF Hive 3.0 this * field was simply the bucket ID. Since 3.0 it does bit packing to store several things: @@ -86,50 +88,50 @@ public int decodeStatementId(int bucketProperty) { } @Override public int encode(AcidOutputFormat.Options options) { - int statementId = options.getStatementId() >= 0 ? options.getStatementId() : 0; + final int statementId = options.getStatementId(); + final int bucketId = options.getBucketId(); + + Preconditions.checkArgument(bucketId >= 0 && bucketId <= MAX_BUCKET_ID, "Bucket ID out of range: " + bucketId); + Preconditions.checkArgument(statementId >= -1 && statementId <= MAX_STATEMENT_ID, + "Statement ID out of range: " + statementId); - assert this.version >=0 && this.version <= MAX_VERSION - : "Version out of range: " + version; - if(!(options.getBucketId() >= 0 && options.getBucketId() <= MAX_BUCKET_ID)) { - throw new IllegalArgumentException("bucketId out of range: " + options.getBucketId()); - } - if(!(statementId >= 0 && statementId <= MAX_STATEMENT_ID)) { - throw new IllegalArgumentException("statementId out of range: " + statementId); - } - return this.version << (1 + NUM_BUCKET_ID_BITS + 4 + NUM_STATEMENT_ID_BITS) | - options.getBucketId() << (4 + NUM_STATEMENT_ID_BITS) | statementId; + return this.version << (1 + NUM_BUCKET_ID_BITS + 4 + NUM_STATEMENT_ID_BITS) + | options.getBucketId() << (4 + NUM_STATEMENT_ID_BITS) | Math.max(0, statementId); } }; private static final int TOP3BITS_MASK = 0b1110_0000_0000_0000_0000_0000_0000_0000; private static final int NUM_VERSION_BITS = 3; private static final int NUM_BUCKET_ID_BITS = 12; private static final int NUM_STATEMENT_ID_BITS = 12; - private static final int MAX_VERSION = (1 << NUM_VERSION_BITS) - 1; + public static final int MAX_VERSION = (1 << NUM_VERSION_BITS) - 1; public static final int MAX_BUCKET_ID = (1 << NUM_BUCKET_ID_BITS) - 1; - private static final int MAX_STATEMENT_ID = (1 << NUM_STATEMENT_ID_BITS) - 1; + public static final int MAX_STATEMENT_ID = (1 << NUM_STATEMENT_ID_BITS) - 1; public static BucketCodec determineVersion(int bucket) { - assert 7 << 29 == BucketCodec.TOP3BITS_MASK; - //look at top 3 bits and return appropriate enum try { + // look at top 3 bits and return appropriate enum return getCodec((BucketCodec.TOP3BITS_MASK & bucket) >>> 29); - } - catch(IllegalArgumentException ex) { - throw new IllegalArgumentException(ex.getMessage() + " Cannot decode version from " + bucket); + } catch (IllegalArgumentException iae) { + throw new IllegalArgumentException("Cannot decode version from bucket number: " + Integer.toHexString(bucket), + iae); } } + public static BucketCodec getCodec(int version) { switch (version) { - case 0: - return BucketCodec.V0; - case 1: - return BucketCodec.V1; - default: - throw new IllegalArgumentException("Illegal 'bucket' format. Version=" + version); + case 0: + return BucketCodec.V0; + case 1: + return BucketCodec.V1; + default: + throw new IllegalArgumentException("Illegal 'bucket' format. Version=" + version); } } + final int version; + BucketCodec(int version) { + Preconditions.checkPositionIndex(version, MAX_VERSION, "Version out of range: " + version); this.version = version; } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestBucketCodec.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestBucketCodec.java new file mode 100644 index 0000000000..a5b843fa98 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestBucketCodec.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class TestBucketCodec { + + /** + * There are only 2 valid codec [0,1]. + */ + @Test(expected = IllegalArgumentException.class) + public void testGetBucketCodecInvalidVersion() { + BucketCodec.getCodec(4); + } + + /** + * Test Bucket Codec version 0. This is the "legacy" version. + */ + @Test + public void testGetBucketCodecVersion0() { + final BucketCodec codec = BucketCodec.getCodec(0); + assertEquals(BucketCodec.V0, codec); + assertEquals(0, codec.getVersion()); + + // Returns the provided value + assertEquals(7, codec.decodeWriterId(7)); + + // Always returns 0 + assertEquals(0, codec.decodeStatementId(100)); + assertEquals(0, codec.decodeStatementId(-10)); + + // Returns the bucket value from Options + final AcidOutputFormat.Options options = new AcidOutputFormat.Options(null).bucket(7); + assertEquals(7, codec.encode(options)); + } + + /** + * Test Bucket Codec version 1. Represents format of "bucket" property in Hive + * 3.0. + */ + @Test + public void testGetBucketCodecVersion1() { + final BucketCodec codec = BucketCodec.getCodec(1); + assertEquals(BucketCodec.V1, codec); + assertEquals(1, codec.getVersion()); + + assertEquals(2748, codec.decodeWriterId(0x0ABC0000)); + + assertEquals(2748, codec.decodeStatementId(0x00000ABC)); + + final AcidOutputFormat.Options options = new AcidOutputFormat.Options(null).bucket(7).statementId(16); + assertEquals(537329680, codec.encode(options)); + + // Statement ID of -1 is acceptable and has the same affect as a value of 0 + final AcidOutputFormat.Options optionsNeg = new AcidOutputFormat.Options(null).bucket(7).statementId(-1); + final AcidOutputFormat.Options optionsZero = new AcidOutputFormat.Options(null).bucket(7).statementId(0); + assertEquals(codec.encode(optionsZero), codec.encode(optionsNeg)); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetBucketCodecVersion1EncodeNegativeBucketId() { + BucketCodec.getCodec(1).encode(new AcidOutputFormat.Options(null).bucket(-1).statementId(16)); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetBucketCodecVersion1EncodeMaxBucketId() { + BucketCodec.getCodec(1) + .encode(new AcidOutputFormat.Options(null).bucket(BucketCodec.MAX_BUCKET_ID + 1).statementId(16)); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetBucketCodecVersion1EncodeNegativeStatementId() { + // A value of "-1" is acceptable + BucketCodec.getCodec(1).encode(new AcidOutputFormat.Options(null).bucket(7).statementId(-2)); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetBucketCodecVersion1EncodeMaxStatementId() { + BucketCodec.getCodec(1) + .encode(new AcidOutputFormat.Options(null).bucket(7).statementId(BucketCodec.MAX_STATEMENT_ID + 1)); + } + +}