diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java index eb9ded7e78..786e2a4da6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketCodec.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.io; +import com.google.common.base.Preconditions; + /** * This class makes sense of {@link RecordIdentifier#getBucketProperty()}. Up until ASF Hive 3.0 this * field was simply the bucket ID. Since 3.0 it does bit packing to store several things: @@ -85,21 +87,25 @@ public int decodeStatementId(int bucketProperty) { return (bucketProperty & 0b0000_0000_0000_0000_0000_1111_1111_1111); } @Override - public int encode(AcidOutputFormat.Options options) { - int statementId = options.getStatementId() >= 0 ? options.getStatementId() : 0; + public int encode(final AcidOutputFormat.Options options) { + final int statementId = options.getStatementId(); + final int bucketId = options.getBucketId(); + + Preconditions.checkArgument(bucketId >= 0 && bucketId <= MAX_BUCKET_ID, "bucketId out of range: " + bucketId); - assert this.version >=0 && this.version <= MAX_VERSION - : "Version out of range: " + version; - if(!(options.getBucketId() >= 0 && options.getBucketId() <= MAX_BUCKET_ID)) { - throw new IllegalArgumentException("bucketId out of range: " + options.getBucketId()); - } - if(!(statementId >= 0 && statementId <= MAX_STATEMENT_ID)) { - throw new IllegalArgumentException("statementId out of range: " + statementId); - } - return this.version << (1 + NUM_BUCKET_ID_BITS + 4 + NUM_STATEMENT_ID_BITS) | - options.getBucketId() << (4 + NUM_STATEMENT_ID_BITS) | statementId; + /* + * This can be set to -1 to make the system generate old style + * (delta_xxxx_yyyy) file names. This is primarily needed for testing to + * make sure 1.3 code can still read files created by older code. + */ + Preconditions.checkArgument(statementId >= -1 && statementId <= MAX_STATEMENT_ID, + "statementId out of range: " + statementId); + + return this.version << (1 + NUM_BUCKET_ID_BITS + 4 + NUM_STATEMENT_ID_BITS) + | options.getBucketId() << (4 + NUM_STATEMENT_ID_BITS) | Math.max(0, statementId); } }; + private static final int TOP3BITS_MASK = 0b1110_0000_0000_0000_0000_0000_0000_0000; private static final int NUM_VERSION_BITS = 3; private static final int NUM_BUCKET_ID_BITS = 12; @@ -108,28 +114,32 @@ public int encode(AcidOutputFormat.Options options) { public static final int MAX_BUCKET_ID = (1 << NUM_BUCKET_ID_BITS) - 1; private static final int MAX_STATEMENT_ID = (1 << NUM_STATEMENT_ID_BITS) - 1; - public static BucketCodec determineVersion(int bucket) { + public static BucketCodec determineVersion(final int bucket) { assert 7 << 29 == BucketCodec.TOP3BITS_MASK; - //look at top 3 bits and return appropriate enum try { + // look at top 3 bits and return appropriate enum return getCodec((BucketCodec.TOP3BITS_MASK & bucket) >>> 29); - } - catch(IllegalArgumentException ex) { - throw new IllegalArgumentException(ex.getMessage() + " Cannot decode version from " + bucket); + } catch (IllegalArgumentException iae) { + throw new IllegalArgumentException("Cannot decode version from bucket number: " + Integer.toHexString(bucket), + iae); } } - public static BucketCodec getCodec(int version) { + + public static BucketCodec getCodec(final int version) { switch (version) { - case 0: - return BucketCodec.V0; - case 1: - return BucketCodec.V1; - default: - throw new IllegalArgumentException("Illegal 'bucket' format. Version=" + version); + case 0: + return BucketCodec.V0; + case 1: + return BucketCodec.V1; + default: + throw new IllegalArgumentException("Illegal 'bucket' format. Version=" + version); } } + final int version; + BucketCodec(int version) { + Preconditions.checkArgument(version >= 0 && version <= MAX_VERSION, "Version out of range: " + version); this.version = version; }