From a891be8fa0980a465837cba7d27cc4060e4b2c43 Mon Sep 17 00:00:00 2001 From: Rishit Shroff Date: Mon, 5 Jan 2015 17:21:55 -0800 Subject: [PATCH] [HBASE-12815] Remove HBase specific Data structures like KeyValue, WALEdit, HTable* Summary: OSS HBase as different versions of data structures and the current module was retaining old ones from 0.89-fb. This diff removes the dependencies on these 0.89-fb versions and abstracts out some pieces of this. The next diff will integrate it with the WALEdit present in OSS version. Test Plan: mvn clean package Reviewers: tedyu, fantasist, reddragon, eclark --- .../org/apache/hadoop/hbase/HColumnDescriptor.java | 1001 --------- .../java/org/apache/hadoop/hbase/HConstants.java | 844 +------ .../org/apache/hadoop/hbase/HTableDescriptor.java | 792 ------- .../java/org/apache/hadoop/hbase/KeyValue.java | 2299 -------------------- .../hbase/consensus/client/QuorumClient.java | 8 +- .../consensus/client/QuorumLoadTestClient.java | 26 +- .../consensus/client/QuorumThriftClientAgent.java | 6 +- .../consensus/client/QuorumThriftClientCLI.java | 12 - .../apache/hadoop/hbase/consensus/log/Edit.java | 19 + .../hadoop/hbase/consensus/log/EditCodec.java | 17 + .../hadoop/hbase/consensus/log/LogFileViewer.java | 33 +- .../hadoop/hbase/consensus/log/QuorumEdit.java | 54 + .../hbase/consensus/log/QuorumEditCodec.java | 187 ++ .../hadoop/hbase/consensus/quorum/QuorumAgent.java | 118 +- .../hadoop/hbase/consensus/quorum/QuorumInfo.java | 2 +- .../hbase/consensus/quorum/RaftQuorumContext.java | 12 +- .../hbase/consensus/server/ConsensusService.java | 4 +- .../consensus/server/ConsensusServiceImpl.java | 4 +- .../consensus/server/LocalConsensusServer.java | 46 +- .../hadoop/hbase/consensus/util/RaftUtil.java | 2 - .../hadoop/hbase/regionserver/wal/WALEdit.java | 521 ----- .../hadoop/hbase/consensus/LocalTestBed.java | 44 +- .../hadoop/hbase/consensus/RaftTestUtil.java | 51 +- .../consensus/ReplicationLoadForUnitTest.java | 2 +- .../hbase/consensus/TestBasicPeerSeeding.java | 21 +- .../hadoop/hbase/consensus/TestBasicPeerSlow.java | 2 +- .../hbase/consensus/TestBasicQuorumCommit.java | 17 +- .../hadoop/hbase/consensus/TestCommitDeadline.java | 6 +- .../hadoop/hbase/consensus/TestLogFileViewer.java | 32 +- .../hbase/consensus/TestLogWriterAndReader.java | 47 +- .../hbase/consensus/TestRandomAccessLog.java | 23 +- .../hadoop/hbase/consensus/TestReadOnlyLog.java | 30 +- .../fsm/TestAsyncStatesInRaftStateMachine.java | 12 +- .../hbase/consensus/log/TestQuorumEditCodec.java | 23 + 34 files changed, 567 insertions(+), 5750 deletions(-) delete mode 100644 hbase-consensus/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java delete mode 100644 hbase-consensus/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java delete mode 100644 hbase-consensus/src/main/java/org/apache/hadoop/hbase/KeyValue.java create mode 100644 hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/Edit.java create mode 100644 hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/EditCodec.java create mode 100644 hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/QuorumEdit.java create mode 100644 hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/QuorumEditCodec.java delete mode 100644 hbase-consensus/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java create mode 100644 hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/log/TestQuorumEditCodec.java diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java deleted file mode 100644 index 0dbe7fd..0000000 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java +++ /dev/null @@ -1,1001 +0,0 @@ -/** - * Copyright 2007 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; -import org.apache.hadoop.hbase.io.hfile.Compression; -//import org.apache.hadoop.hbase.regionserver.StoreFile; -//import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.*; - -/** - * An HColumnDescriptor contains information about a column family such as the - * number of versions, compression settings, etc. - * - * It is used as input when creating a table or adding a column. Once set, the - * parameters that specify a column cannot be changed without deleting the - * column and recreating it. If there is data stored in the column, it will be - * deleted when the column is deleted. - */ -public class HColumnDescriptor implements WritableComparable { - static final Log LOG = LogFactory.getLog(HColumnDescriptor.class); - // For future backward compatibility - - // Version 3 was when column names become byte arrays and when we picked up - // Time-to-live feature. Version 4 was when we moved to byte arrays, HBASE-82. - // Version 5 was when bloom filter descriptors were removed. - // Version 6 adds metadata as a map where keys and values are byte[]. - // Version 7 -- add new compression and hfile blocksize to HColumnDescriptor (HBASE-1217) - // Version 8 -- reintroduction of bloom filters, changed from boolean to enum - // Version 9 -- add data block encoding - // Version 10 -- add flash back time - private static final byte COLUMN_DESCRIPTOR_VERSION = (byte) 10; - - // These constants are used as FileInfo keys - public static final String COMPRESSION = "COMPRESSION"; - public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT"; - public static final String ENCODE_ON_DISK = - "ENCODE_ON_DISK"; - public static final String DATA_BLOCK_ENCODING = - "DATA_BLOCK_ENCODING"; - public static final String BLOCKCACHE = "BLOCKCACHE"; - - /** - * The type of compression. - * @see org.apache.hadoop.io.SequenceFile.Writer - * @deprecated Compression now means which compression library - * rather than 'what' to compress. - */ - @Deprecated - public static enum CompressionType { - /** Do not compress records. */ - NONE, - /** Compress values only, each separately. */ - RECORD, - /** Compress sequences of records together in blocks. */ - BLOCK - } - - public static final String BLOCKSIZE = "BLOCKSIZE"; - public static final String LENGTH = "LENGTH"; - public static final String TTL = "TTL"; - // The amount of time in seconds in the past upto which we support FlashBack - // queries. Ex. 60 * 60 * 24 indicates we support FlashBack queries upto 1 day - // ago. - public static final String FLASHBACK_QUERY_LIMIT = "FLASHBACK_QUERY_LIMIT"; - public static final String BLOOMFILTER = "BLOOMFILTER"; - public static final String BLOOMFILTER_ERRORRATE = "BLOOMFILTER_ERRORRATE"; - public static final String FOREVER = "FOREVER"; - public static final String REPLICATION_SCOPE = "REPLICATION_SCOPE"; - public static final String ROWKEY_PREFIX_LENGTH_FOR_BLOOMFILTER = "ROWKEY_PREFIX_LENGTH"; - public static final String HFILEHISTOGRAM_BUCKET_COUNT = - "HFILEHISTOGRAM_BUCKET_COUNT"; - - /** - * Default compression type. - */ - public static final String DEFAULT_COMPRESSION = - Compression.Algorithm.NONE.getName(); - - /** - * Default value of the flag that enables data block encoding on disk, as - * opposed to encoding in cache only. We encode blocks everywhere by default, - * as long as {@link #DATA_BLOCK_ENCODING} is not NONE. - */ - public static final boolean DEFAULT_ENCODE_ON_DISK = true; - - /** Default data block encoding algorithm. */ - public static final String DEFAULT_DATA_BLOCK_ENCODING = - DataBlockEncoding.NONE.toString(); - - /** - * Default number of versions of a record to keep. - */ - public static final int DEFAULT_VERSIONS = 3; - - /** - * Default setting for whether to serve from memory or not. - */ - public static final boolean DEFAULT_IN_MEMORY = false; - - /** - * Default setting for whether to use a block cache or not. - */ - public static final boolean DEFAULT_BLOCKCACHE = true; - - /** - * Default size of blocks in files store to the filesytem. Use smaller for - * faster random-access at expense of larger indices (more memory consumption). - */ - public static final int DEFAULT_BLOCKSIZE = 1024; //HFile.DEFAULT_BLOCKSIZE; - -// /** -// * Default setting for whether or not to use bloomfilters. -// */ - public static final String DEFAULT_BLOOMFILTER = " "; // StoreFile.BloomType.NONE.toString(); - - /** - * Default setting for the RowKey Prefix Length for the Bloomfilter. - */ - public static final int DEFAULT_ROWKEY_PREFIX_LENGTH_FOR_BLOOM = -1; - - /** - * Default value for bloom filter error rate. - */ - public static final float DEFAULT_BLOOMFILTER_ERROR_RATE = 0.01f; - - - /** - * Default time to live of cell contents. - */ - public static final int DEFAULT_TTL = HConstants.FOREVER; - - /** - * Default flash back time. Flash back time is the number of seconds in the - * past upto which we support flash back queries. - */ - public static final int DEFAULT_FLASHBACK_QUERY_LIMIT = 0; - - /** - * Default scope. - */ - public static final int DEFAULT_REPLICATION_SCOPE = HConstants.REPLICATION_SCOPE_LOCAL; - - private final static Map DEFAULT_VALUES - = new HashMap(); - private final static Set RESERVED_KEYWORDS - = new HashSet(); - static { - DEFAULT_VALUES.put(BLOOMFILTER, DEFAULT_BLOOMFILTER); - DEFAULT_VALUES.put(BLOOMFILTER_ERRORRATE, - String.valueOf(DEFAULT_BLOOMFILTER_ERROR_RATE)); - DEFAULT_VALUES.put(REPLICATION_SCOPE, String.valueOf(DEFAULT_REPLICATION_SCOPE)); - DEFAULT_VALUES.put(HConstants.VERSIONS, String.valueOf(DEFAULT_VERSIONS)); - DEFAULT_VALUES.put(COMPRESSION, DEFAULT_COMPRESSION); - DEFAULT_VALUES.put(TTL, String.valueOf(DEFAULT_TTL)); - DEFAULT_VALUES.put(FLASHBACK_QUERY_LIMIT, - String.valueOf(DEFAULT_FLASHBACK_QUERY_LIMIT)); - DEFAULT_VALUES.put(BLOCKSIZE, String.valueOf(DEFAULT_BLOCKSIZE)); - DEFAULT_VALUES.put(HConstants.IN_MEMORY, String.valueOf(DEFAULT_IN_MEMORY)); - DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE)); - DEFAULT_VALUES.put(ENCODE_ON_DISK, - String.valueOf(DEFAULT_ENCODE_ON_DISK)); - DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, - String.valueOf(DEFAULT_DATA_BLOCK_ENCODING)); - DEFAULT_VALUES.put(ROWKEY_PREFIX_LENGTH_FOR_BLOOMFILTER, - String.valueOf(DEFAULT_ROWKEY_PREFIX_LENGTH_FOR_BLOOM)); -// DEFAULT_VALUES.put(HFILEHISTOGRAM_BUCKET_COUNT, -// String.valueOf(HFileHistogram.DEFAULT_HFILEHISTOGRAM_BINCOUNT)); - for (String s : DEFAULT_VALUES.keySet()) { - RESERVED_KEYWORDS.add(new ImmutableBytesWritable(Bytes.toBytes(s))); - } - } - - /* - * Cache here the HCD value. - * Question: its OK to cache since when we're reenable, we create a new HCD? - */ - private volatile Integer blocksize = null; - - // Column family name - private byte [] name; - - // Column metadata - protected final Map values = - new HashMap(); - - /* - * Cache the max versions rather than calculate it every time. - */ - private int cachedMaxVersions = -1; - - /** - * Default constructor. Must be present for Writable. - */ - public HColumnDescriptor() { - this.name = null; - } - - /** - * Construct a column descriptor specifying only the family name - * The other attributes are defaulted. - * - * @param familyName Column family name. Must be 'printable' -- digit or - * letter -- and may not contain a : - */ - public HColumnDescriptor(final String familyName) { - this(Bytes.toBytes(familyName)); - } - - /** - * Construct a column descriptor specifying only the family name - * The other attributes are defaulted. - * - * @param familyName Column family name. Must be 'printable' -- digit or - * letter -- and may not contain a : - */ - public HColumnDescriptor(final byte [] familyName) { - this (familyName == null || familyName.length <= 0? - HConstants.EMPTY_BYTE_ARRAY: familyName, DEFAULT_VERSIONS, - DEFAULT_COMPRESSION, DEFAULT_IN_MEMORY, DEFAULT_BLOCKCACHE, - DEFAULT_TTL, DEFAULT_BLOOMFILTER); - } - - /** - * Constructor. - * Makes a deep copy of the supplied descriptor. - * Can make a modifiable descriptor from an UnmodifyableHColumnDescriptor. - * @param desc The descriptor. - */ - public HColumnDescriptor(HColumnDescriptor desc) { - super(); - this.name = desc.name.clone(); - for (Map.Entry e: - desc.values.entrySet()) { - this.values.put(e.getKey(), e.getValue()); - } - } - - /** - * Constructor - * @param familyName Column family name. Must be 'printable' -- digit or - * letter -- and may not contain a : - * @param maxVersions Maximum number of versions to keep - * @param compression Compression type - * @param inMemory If true, column data should be kept in an HRegionServer's - * cache - * @param blockCacheEnabled If true, MapFile blocks should be cached - * @param timeToLive Time-to-live of cell contents, in seconds - * (use HConstants.FOREVER for unlimited TTL) - * @param bloomFilter Bloom filter type for this column - * - * @throws IllegalArgumentException if passed a family name that is made of - * other than 'word' characters: i.e. [a-zA-Z_0-9] or contains - * a : - * @throws IllegalArgumentException if the number of versions is <= 0 - * @deprecated use {@link #HColumnDescriptor(String)} and setters - */ - @Deprecated - public HColumnDescriptor(final byte [] familyName, final int maxVersions, - final String compression, final boolean inMemory, - final boolean blockCacheEnabled, - final int timeToLive, final String bloomFilter) { - this(familyName, maxVersions, compression, inMemory, blockCacheEnabled, - DEFAULT_BLOCKSIZE, timeToLive, bloomFilter, DEFAULT_REPLICATION_SCOPE); - } - - /** - * Constructor - * @param familyName Column family name. Must be 'printable' -- digit or - * letter -- and may not contain a : - * @param maxVersions Maximum number of versions to keep - * @param compression Compression type - * @param inMemory If true, column data should be kept in an HRegionServer's - * cache - * @param blockCacheEnabled If true, MapFile blocks should be cached - * @param blocksize Block size to use when writing out storefiles. Use - * smaller block sizes for faster random-access at expense of larger indices - * (more memory consumption). Default is usually 64k. - * @param timeToLive Time-to-live of cell contents, in seconds - * (use HConstants.FOREVER for unlimited TTL) - * @param bloomFilter Bloom filter type for this column - * @param scope The scope tag for this column - * - * @throws IllegalArgumentException if passed a family name that is made of - * other than 'word' characters: i.e. [a-zA-Z_0-9] or contains - * a : - * @throws IllegalArgumentException if the number of versions is <= 0 - * @deprecated use {@link #HColumnDescriptor(String)} and setters - */ - @Deprecated - public HColumnDescriptor(final byte [] familyName, final int maxVersions, - final String compression, final boolean inMemory, - final boolean blockCacheEnabled, final int blocksize, - final int timeToLive, final String bloomFilter, final int scope) { - this(familyName, maxVersions, - compression, DEFAULT_ENCODE_ON_DISK, DEFAULT_DATA_BLOCK_ENCODING, - inMemory, blockCacheEnabled, blocksize, timeToLive, bloomFilter, - scope, DEFAULT_BLOOMFILTER_ERROR_RATE); - } - - /** - * Constructor - * @param familyName Column family name. Must be 'printable' -- digit or - * letter -- and may not contain a : - * @param maxVersions Maximum number of versions to keep - * @param compression Compression type - * @param encodeOnDisk whether to use the specified data block encoding - * on disk. If false, the encoding will be used in cache only. - * @param dataBlockEncoding data block encoding - * @param inMemory If true, column data should be kept in an HRegionServer's - * cache - * @param blockCacheEnabled If true, MapFile blocks should be cached - * @param blocksize - * @param timeToLive Time-to-live of cell contents, in seconds - * (use HConstants.FOREVER for unlimited TTL) - * @param bloomFilter Bloom filter type for this column - * @param scope The scope tag for this column - * @param bloomErrorRate Bloom filter error rate for this column - * @throws IllegalArgumentException if passed a family name that is made of - * other than 'word' characters: i.e. [a-zA-Z_0-9] or contains - * a : - * @throws IllegalArgumentException if the number of versions is <= 0 - * @deprecated use {@link #HColumnDescriptor(String)} and setters - */ - @Deprecated - public HColumnDescriptor(final byte[] familyName, - final int maxVersions, - final String compression, final boolean encodeOnDisk, - final String dataBlockEncoding, final boolean inMemory, - final boolean blockCacheEnabled, final int blocksize, - final int timeToLive, final String bloomFilter, final int scope, - float bloomErrorRate) { - isLegalFamilyName(familyName); - this.name = familyName; - - if (maxVersions <= 0) { - // TODO: Allow maxVersion of 0 to be the way you say "Keep all versions". - // Until there is support, consider 0 or < 0 -- a configuration error. - throw new IllegalArgumentException("Maximum versions must be positive"); - } - setMaxVersions(maxVersions); - setInMemory(inMemory); - setBlockCacheEnabled(blockCacheEnabled); - setTimeToLive(timeToLive); - setCompressionType(Compression.Algorithm. - valueOf(compression.toUpperCase())); - setEncodeOnDisk(encodeOnDisk); - setDataBlockEncoding(DataBlockEncoding. - valueOf(dataBlockEncoding.toUpperCase())); -// setBloomFilterType(StoreFile.BloomType. -// valueOf(bloomFilter.toUpperCase())); - setBloomFilterErrorRate(bloomErrorRate); - setBlocksize(blocksize); - setScope(scope); - } - - /** - * @param b Family name. - * @return b - * @throws IllegalArgumentException If not null and not a legitimate family - * name: i.e. 'printable' and ends in a ':' (Null passes are allowed because - * b can be null when deserializing). Cannot start with a '.' - * either. - */ - public static byte [] isLegalFamilyName(final byte [] b) { - if (b == null) { - return b; - } - if (b[0] == '.') { - throw new IllegalArgumentException("Family names cannot start with a " + - "period: " + Bytes.toString(b)); - } - for (int i = 0; i < b.length; i++) { - if (Character.isISOControl(b[i]) || b[i] == ':') { - throw new IllegalArgumentException("Illegal character <" + b[i] + - ">. Family names cannot contain control characters or colons: " + - Bytes.toString(b)); - } - } - return b; - } - - public void setName(byte[] name) { - this.name = name; - } - - /** - * @return Name of this column family - */ - public byte [] getName() { - return name; - } - - /** - * @return Name of this column family - */ - public String getNameAsString() { - return Bytes.toString(this.name); - } - - /** - * @param key The key. - * @return The value. - */ - public byte[] getValue(byte[] key) { - ImmutableBytesWritable ibw = values.get(new ImmutableBytesWritable(key)); - if (ibw == null) - return null; - return ibw.get(); - } - - /** - * @param key The key. - * @return The value as a string. - */ - public String getValue(String key) { - byte[] value = getValue(Bytes.toBytes(key)); - if (value == null) - return null; - return Bytes.toString(value); - } - - /** - * @return All values. - */ - public Map getValues() { - // shallow pointer copy - return Collections.unmodifiableMap(values); - } - - /** - * @param key The key. - * @param value The value. - * @return this (for chained invocation) - */ - public HColumnDescriptor setValue(byte[] key, byte[] value) { - values.put(new ImmutableBytesWritable(key), - new ImmutableBytesWritable(value)); - return this; - } - - /** - * @param key Key whose key and value we're to remove from HCD parameters. - */ - public void remove(final byte [] key) { - values.remove(new ImmutableBytesWritable(key)); - } - - /** - * @param key The key. - * @param value The value. - * @return this (for chained invocation) - */ - public HColumnDescriptor setValue(String key, String value) { - if (value == null) { - remove(Bytes.toBytes(key)); - } else { - setValue(Bytes.toBytes(key), Bytes.toBytes(value)); - } - return this; - } - - /** @return compression type being used for the column family */ - public Compression.Algorithm getCompression() { - String n = getValue(COMPRESSION); - if (n != null) { - return Compression.Algorithm.valueOf(n.toUpperCase()); - } else { - return Compression.Algorithm.valueOf(DEFAULT_COMPRESSION); - } - } - - /** @return maximum number of versions */ - public synchronized int getMaxVersions() { - if (this.cachedMaxVersions == -1) { - String value = getValue(HConstants.VERSIONS); - this.cachedMaxVersions = (value != null)? - Integer.valueOf(value).intValue(): DEFAULT_VERSIONS; - } - return this.cachedMaxVersions; - } - - /** - * @param maxVersions maximum number of versions - * @return this (for chained invocation) - */ - public HColumnDescriptor setMaxVersions(int maxVersions) { - setValue(HConstants.VERSIONS, Integer.toString(maxVersions)); - cachedMaxVersions = maxVersions; - return this; - } - - /** - * @return Blocksize. - */ - public synchronized int getBlocksize() { - if (this.blocksize == null) { - String value = getValue(BLOCKSIZE); - this.blocksize = (value != null)? - Integer.decode(value): Integer.valueOf(DEFAULT_BLOCKSIZE); - } - return this.blocksize.intValue(); - } - - /** - * @param s Blocksize to use when writing out storefiles/hfiles on this - * column family. - * @return this (for chained invocation) - */ - public HColumnDescriptor setBlocksize(int s) { - setValue(BLOCKSIZE, Integer.toString(s)); - this.blocksize = null; - return this; - } - - /** - * @return Compression type setting. - */ - public Compression.Algorithm getCompressionType() { - return getCompression(); - } - - /** - * Compression types supported in hbase. - * LZO is not bundled as part of the hbase distribution. - * See LZO Compression - * for how to enable it. - * @param type Compression type setting. - * @return this (for chained invocation) - */ - public HColumnDescriptor setCompressionType(Compression.Algorithm type) { - String compressionType = type.getName().toUpperCase(); - return setValue(COMPRESSION, compressionType); - } - - /** - * @param compressionTypeStr compression type as a string - * @return this (for chained invocation) - */ - public HColumnDescriptor setCompressionType(String compressionTypeStr) { - return setCompressionType( - Compression.Algorithm.valueOf(compressionTypeStr.toUpperCase())); - } - - /** @return data block encoding algorithm used on disk */ - public DataBlockEncoding getDataBlockEncodingOnDisk() { - String encodeOnDiskStr = getValue(ENCODE_ON_DISK); - boolean encodeOnDisk; - if (encodeOnDiskStr == null) { - encodeOnDisk = DEFAULT_ENCODE_ON_DISK; - } else { - encodeOnDisk = Boolean.valueOf(encodeOnDiskStr); - } - - if (!encodeOnDisk) { - // No encoding on disk. - return DataBlockEncoding.NONE; - } - return getDataBlockEncoding(); - } - - /** - * Set the flag indicating that we only want to encode data block in cache - * but not on disk. - * @return this (for chained invocation) - */ - public HColumnDescriptor setEncodeOnDisk(boolean encodeOnDisk) { - return setValue(ENCODE_ON_DISK, String.valueOf(encodeOnDisk)); - } - - /** - * @return the data block encoding algorithm used in block cache and - * optionally on disk - */ - public DataBlockEncoding getDataBlockEncoding() { - String type = getValue(DATA_BLOCK_ENCODING); - if (type == null) { - type = DEFAULT_DATA_BLOCK_ENCODING; - } - return DataBlockEncoding.valueOf(type); - } - - /** - * Set data block encoding algorithm used in block cache. - * @param type What kind of data block encoding will be used. - * @return this (for chained invocation) - */ - public HColumnDescriptor setDataBlockEncoding(DataBlockEncoding type) { - String name; - if (type != null) { - name = type.toString(); - } else { - name = DataBlockEncoding.NONE.toString(); - } - return setValue(DATA_BLOCK_ENCODING, name); - } - - /** - * @return True if we are to keep all in use HRegionServer cache. - */ - public boolean isInMemory() { - String value = getValue(HConstants.IN_MEMORY); - if (value != null) - return Boolean.valueOf(value).booleanValue(); - return DEFAULT_IN_MEMORY; - } - - /** - * @param inMemory True if we are to keep all values in the HRegionServer - * cache - * @return this (for chained invocation) - */ - public HColumnDescriptor setInMemory(boolean inMemory) { - return setValue(HConstants.IN_MEMORY, Boolean.toString(inMemory)); - } - - /** - * @return Time-to-live of cell contents, in seconds. - */ - public int getTimeToLive() { - String value = getValue(TTL); - return (value != null)? Integer.valueOf(value).intValue(): DEFAULT_TTL; - } - - /** - * @param timeToLive Time-to-live of cell contents, in seconds. - * @return this (for chained invocation) - */ - public HColumnDescriptor setTimeToLive(int timeToLive) { - return setValue(TTL, Integer.toString(timeToLive)); - } - - /** - * @return the time in seconds for how far back in the past we support flash - * back queries. - */ - public int getFlashBackQueryLimit() { - String value = getValue(FLASHBACK_QUERY_LIMIT); - return (value != null) ? Integer.valueOf(value).intValue() - : DEFAULT_FLASHBACK_QUERY_LIMIT; - } - - /** - * @param flashBackQueryLimit - * the time in seconds for how far back in the past we support flash - * back queries. - * @return this (for chained invocation) - */ - public HColumnDescriptor setFlashBackQueryLimit(int flashBackQueryLimit) { - if (flashBackQueryLimit < 0) { - throw new IllegalArgumentException( - "FlashBackQueryLimit cannot be negative"); - } - return setValue(FLASHBACK_QUERY_LIMIT, - Integer.toString(flashBackQueryLimit)); - } - - /** - * @return True if MapFile blocks should be cached. - */ - public boolean isBlockCacheEnabled() { - String value = getValue(BLOCKCACHE); - if (value != null) - return Boolean.valueOf(value).booleanValue(); - return DEFAULT_BLOCKCACHE; - } - - /** - * @param blockCacheEnabled True if MapFile blocks should be cached. - * @return this (for chained invocation) - */ - public HColumnDescriptor setBlockCacheEnabled(boolean blockCacheEnabled) { - return setValue(BLOCKCACHE, Boolean.toString(blockCacheEnabled)); - } - -// /** -// * @return bloom filter type used for new StoreFiles in ColumnFamily -// */ -// public StoreFile.BloomType getBloomFilterType() { -// String n = getValue(BLOOMFILTER); -// if (n == null) { -// n = DEFAULT_BLOOMFILTER; -// } -// return StoreFile.BloomType.valueOf(n.toUpperCase()); -// } - - public int getIntValueFromString(String key, int defaultValue, - String parseErrorMessage) { - String n = getValue(key); - int returnValue = defaultValue; - if (n != null) { - try { - returnValue = Integer.parseInt(n); - } catch (Throwable e) { - LOG.error("Invalid Input " + n + ". " + parseErrorMessage, e); - } - } - return returnValue; - } - - public int getHFileHistogramBucketCount() { - return 0; -// getIntValueFromString( -// HFILEHISTOGRAM_BUCKET_COUNT, -// HFileHistogram.DEFAULT_HFILEHISTOGRAM_BINCOUNT, -// "Cannot parse the histogram bin count"); - } - - /** - * @return the number of bytes as row key prefix for the bloom filter - */ - public int getRowPrefixLengthForBloom() { - return getIntValueFromString( - ROWKEY_PREFIX_LENGTH_FOR_BLOOMFILTER, - DEFAULT_ROWKEY_PREFIX_LENGTH_FOR_BLOOM, - "Cannot parse row key prefix length"); - } - - public void setRowKeyPrefixLengthForBloom(int prefixLength) { - if (prefixLength > 0) { - setValue(ROWKEY_PREFIX_LENGTH_FOR_BLOOMFILTER, String.valueOf(prefixLength)); - } - } - - public void setHFileHistogramBucketCount(int histogramBucketCount) { - if (histogramBucketCount > 0) { - setValue( - HFILEHISTOGRAM_BUCKET_COUNT, - String.valueOf(histogramBucketCount)); - } - } - -// /** -// * @param bt bloom filter type -// * @return this (for chained invocation) -// */ -// public HColumnDescriptor setBloomFilterType(final StoreFile.BloomType bt) { -// return setValue(BLOOMFILTER, bt.toString()); -// } - -// /** -// * @param bloomTypeStr bloom filter type as a string -// * @return this (for chained invocation) -// */ -// public HColumnDescriptor setBloomFilterType(String bloomTypeStr) { -// return setBloomFilterType(BloomType.valueOf(bloomTypeStr.toUpperCase())); -// } - - public void setBloomFilterErrorRate(float bloomErrorRate) { - setValue(BLOOMFILTER_ERRORRATE, Float.toString(bloomErrorRate)); - } - public float getBloomFilterErrorRate() { - String value = getValue(BLOOMFILTER_ERRORRATE); - return (value != null)? Float.valueOf(value).floatValue() : DEFAULT_BLOOMFILTER_ERROR_RATE; - } - - /** - * @return the scope tag - */ - public int getScope() { - String value = getValue(REPLICATION_SCOPE); - if (value != null) { - return Integer.valueOf(value).intValue(); - } - return DEFAULT_REPLICATION_SCOPE; - } - - /** - * @param scope the scope tag - * @return this (for chained invocation) - */ - public HColumnDescriptor setScope(int scope) { - return setValue(REPLICATION_SCOPE, Integer.toString(scope)); - } - - /** - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - StringBuilder s = new StringBuilder(); - s.append('{'); - s.append(HConstants.NAME); - s.append(" => '"); - s.append(Bytes.toString(name)); - s.append("'"); - s.append(getValues(true)); - s.append('}'); - return s.toString(); - } - - public String toStringCustomizedValues() { - StringBuilder s = new StringBuilder(); - s.append('{'); - s.append(HConstants.NAME); - s.append(" => '"); - s.append(Bytes.toString(name)); - s.append("'"); - s.append(getValues(false)); - s.append('}'); - return s.toString(); - } - - private StringBuilder getValues(boolean printDefaults) { - StringBuilder s = new StringBuilder(); - - boolean hasAdvancedKeys = false; - - // print all reserved keys first - for (ImmutableBytesWritable k : values.keySet()) { - if (!RESERVED_KEYWORDS.contains(k)) { - hasAdvancedKeys = true; - continue; - } - String key = Bytes.toString(k.get()); - String value = Bytes.toString(values.get(k).get()); - if (printDefaults - || !DEFAULT_VALUES.containsKey(key) - || !DEFAULT_VALUES.get(key).equalsIgnoreCase(value)) { - s.append(", "); - s.append(key); - s.append(" => "); - s.append('\'').append(value).append('\''); - } - } - - // print all other keys as advanced options - if (hasAdvancedKeys) { - s.append(", "); - s.append(HConstants.CONFIG).append(" => "); - s.append('{'); - boolean printComma = false; - for (ImmutableBytesWritable k : values.keySet()) { - if (RESERVED_KEYWORDS.contains(k)) { - continue; - } - String key = Bytes.toString(k.get()); - String value = Bytes.toString(values.get(k).get()); - if (printComma) { - s.append(", "); - } - printComma = true; - s.append('\'').append(key).append('\''); - s.append(" => "); - s.append('\'').append(value).append('\''); - } - s.append('}'); - } - return s; - } - - public static Map getDefaultValues() { - return Collections.unmodifiableMap(DEFAULT_VALUES); - } - - /** - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof HColumnDescriptor)) { - return false; - } - return compareTo((HColumnDescriptor)obj) == 0; - } - - /** - * @see java.lang.Object#hashCode() - */ - @Override - public int hashCode() { - int result = Bytes.hashCode(this.name); - result ^= Byte.valueOf(COLUMN_DESCRIPTOR_VERSION).hashCode(); - result ^= values.hashCode(); - return result; - } - - // Writable - - public void readFields(DataInput in) throws IOException { - int version = in.readByte(); - if (version < 6) { - if (version <= 2) { - Text t = new Text(); - t.readFields(in); - this.name = t.getBytes(); -// if(KeyValue.getFamilyDelimiterIndex(this.name, 0, this.name.length) -// > 0) { -// this.name = stripColon(this.name); -// } - } else { - this.name = Bytes.readByteArray(in); - } - this.values.clear(); - setMaxVersions(in.readInt()); - int ordinal = in.readInt(); - setCompressionType(Compression.Algorithm.values()[ordinal]); - setInMemory(in.readBoolean()); -// setBloomFilterType(in.readBoolean() ? BloomType.ROW : BloomType.NONE); -// if (getBloomFilterType() != BloomType.NONE && version < 5) { -// // If a bloomFilter is enabled and the column descriptor is less than -// // version 5, we need to skip over it to read the rest of the column -// // descriptor. There are no BloomFilterDescriptors written to disk for -// // column descriptors with a version number >= 5 -// throw new UnsupportedClassVersionError(this.getClass().getName() + -// " does not support backward compatibility with versions older " + -// "than version 5"); -// } - if (version > 1) { - setBlockCacheEnabled(in.readBoolean()); - } - if (version > 2) { - setTimeToLive(in.readInt()); - } - } else { - // version 6+ - this.name = Bytes.readByteArray(in); - this.values.clear(); - int numValues = in.readInt(); - for (int i = 0; i < numValues; i++) { - ImmutableBytesWritable key = new ImmutableBytesWritable(); - ImmutableBytesWritable value = new ImmutableBytesWritable(); - key.readFields(in); - value.readFields(in); - -// // in version 8, the BloomFilter setting changed from bool to enum -// if (version < 8 && Bytes.toString(key.get()).equals(BLOOMFILTER)) { -// value.set(Bytes.toBytes( -// Boolean.getBoolean(Bytes.toString(value.get())) -// ? BloomType.ROW.toString() -// : BloomType.NONE.toString())); -// } - - values.put(key, value); - } - if (version == 6) { - // Convert old values. - setValue(COMPRESSION, Compression.Algorithm.NONE.getName()); - } - } - } - - public void write(DataOutput out) throws IOException { - out.writeByte(COLUMN_DESCRIPTOR_VERSION); - Bytes.writeByteArray(out, this.name); - out.writeInt(values.size()); - for (Map.Entry e: - values.entrySet()) { - e.getKey().write(out); - e.getValue().write(out); - } - } - - // Comparable - - public int compareTo(HColumnDescriptor o) { - int result = Bytes.compareTo(this.name, o.getName()); - if (result == 0) { - // punt on comparison for ordering, just calculate difference - result = this.values.hashCode() - o.values.hashCode(); - if (result < 0) - result = -1; - else if (result > 0) - result = 1; - } - return result; - } -} diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HConstants.java index 8e6acb7..97dae84 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -30,186 +30,12 @@ import org.apache.hadoop.io.nativeio.NativeIO; * HConstants holds a bunch of HBase-related constants */ public final class HConstants { - /** - * Status codes used for return values of bulk operations. - */ - public enum OperationStatusCode { - NOT_RUN, - SUCCESS, - FAILURE; - } - - /** long constant for zero */ - public static final Long ZERO_L = Long.valueOf(0L); - public static final String NINES = "99999999999999"; - public static final String ZEROES = "00000000000000"; - - // For migration - - /** name of version file */ - public static final String VERSION_FILE_NAME = "hbase.version"; - - /** - * Current version of file system. - * Version 4 supports only one kind of bloom filter. - * Version 5 changes versions in catalog table regions. - * Version 6 enables blockcaching on catalog tables. - * Version 7 introduces hfile -- hbase 0.19 to 0.20.. - */ - // public static final String FILE_SYSTEM_VERSION = "6"; - public static final String FILE_SYSTEM_VERSION = "7"; - - // Configuration parameters - - //TODO: Is having HBase homed on port 60k OK? - - /** Cluster is in distributed mode or not */ - public static final String CLUSTER_DISTRIBUTED = "hbase.cluster.distributed"; - - /** Cluster is standalone or pseudo-distributed */ - public static final String CLUSTER_IS_LOCAL = "false"; - - /** Cluster is fully-distributed */ - public static final String CLUSTER_IS_DISTRIBUTED = "true"; - - /** default host address */ - public static final String DEFAULT_HOST = "0.0.0.0"; - - /** Parameter name for port master listens on. */ - public static final String MASTER_PORT = "hbase.master.port"; - - /** default port that the master listens on */ - public static final int DEFAULT_MASTER_PORT = 60000; - - /** default port for master web api */ - public static final int DEFAULT_MASTER_INFOPORT = 60010; - - /** Configuration key for master web API port */ - public static final String MASTER_INFO_PORT = "hbase.master.info.port"; - - /** Parameter name for the master type being backup (waits for primary to go inactive). */ - public static final String MASTER_TYPE_BACKUP = "hbase.master.backup"; - - /** by default every master is a possible primary master unless the conf explicitly overrides it */ - public static final boolean DEFAULT_MASTER_TYPE_BACKUP = false; - - /** Configuration key for enabling table-level locks for schema changes */ - public static final String MASTER_SCHEMA_CHANGES_LOCK_ENABLE = - "hbase.master.schemaChanges.lock.enable"; - - /** by default we should enable table-level locks for schema changes */ - public static final boolean DEFAULT_MASTER_SCHEMA_CHANGES_LOCK_ENABLE = true; - - /** Configuration key for time out for schema modification locks */ - public static final String MASTER_SCHEMA_CHANGES_LOCK_TIMEOUT_MS = - "hbase.master.schemaChanges.lock.timeout.ms"; - - public static final int DEFAULT_MASTER_SCHEMA_CHANGES_LOCK_TIMEOUT_MS = - 60 * 1000; - - /** Configuration key for time out for schema modification try lock */ - public static final String MASTER_SCHEMA_CHANGES_TRY_LOCK_TIMEOUT_MS = - "hbase.master.schemaChanges.trylock.timeout.ms"; - - public static final int DEFAULT_MASTER_SCHEMA_CHANGES_TRY_LOCK_TIMEOUT_MS = - 5 * 1000; - - /** Configuration key for for schema modification wait interval. */ - public static final String MASTER_SCHEMA_CHANGES_WAIT_INTERVAL_MS = - "hbase.regionserver.alterTable.waitInterval.ms"; - - public static final int DEFAULT_MASTER_SCHEMA_CHANGES_WAIT_INTERVAL_MS = - 1000; - - /** Configuration key for for schema modification max concurrent regions closed. */ - public static final String MASTER_SCHEMA_CHANGES_MAX_CONCURRENT_REGION_CLOSE = - "hbase.regionserver.alterTable.maxConcurrentClose"; - - public static final int DEFAULT_MASTER_SCHEMA_CHANGES_MAX_CONCURRENT_REGION_CLOSE = - 5; - - /** Name of ZooKeeper quorum configuration parameter. */ - public static final String ZOOKEEPER_QUORUM = "hbase.zookeeper.quorum"; - - /** Name of ZooKeeper config file in conf/ directory. */ - public static final String ZOOKEEPER_CONFIG_NAME = "zoo.cfg"; - - /** Common prefix of ZooKeeper configuration properties */ - public static final String ZK_CFG_PROPERTY_PREFIX = - "hbase.zookeeper.property."; - - public static final int ZK_CFG_PROPERTY_PREFIX_LEN = - ZK_CFG_PROPERTY_PREFIX.length(); - - /** Parameter name for number of times to retry writes to ZooKeeper. */ - public static final String ZOOKEEPER_RETRIES = "zookeeper.retries"; - - /** Parameter name for the strategy whether aborting the process - * when zookeeper session expired. - */ - public static final String ZOOKEEPER_SESSION_EXPIRED_ABORT_PROCESS = - "hbase.zookeeper.sessionExpired.abortProcess"; - - /** Parameter name for number of times to retry to connection to ZooKeeper. */ - public static final String ZOOKEEPER_CONNECTION_RETRY_NUM = - "zookeeper.connection.retry.num"; - - /** - * The ZK client port key in the ZK properties map. The name reflects the - * fact that this is not an HBase configuration key. - */ - public static final String CLIENT_PORT_STR = "clientPort"; - - /** Parameter name for the client port that the zookeeper listens on */ - public static final String ZOOKEEPER_CLIENT_PORT = - ZK_CFG_PROPERTY_PREFIX + CLIENT_PORT_STR; - - /** Default number of times to retry writes to ZooKeeper. */ - public static final int DEFAULT_ZOOKEEPER_RETRIES = 5; - - /** Parameter name for ZooKeeper session time out.*/ - public static final String ZOOKEEPER_SESSION_TIMEOUT = - "zookeeper.session.timeout"; - - /** Default value for ZooKeeper session time out. */ - public static final int DEFAULT_ZOOKEEPER_SESSION_TIMEOUT = 60 * 1000; - - /** Parameter name for ZooKeeper pause between retries. In milliseconds. */ - public static final String ZOOKEEPER_PAUSE = "zookeeper.pause"; - /** Default ZooKeeper pause value. In milliseconds. */ - public static final int DEFAULT_ZOOKEEPER_PAUSE = 2 * 1000; - - /** default client port that the zookeeper listens on */ - public static final int DEFAULT_ZOOKEPER_CLIENT_PORT = 2181; - - /** Parameter name for the root dir in ZK for this cluster */ - public static final String ZOOKEEPER_ZNODE_PARENT = "zookeeper.znode.parent"; - - public static final String DEFAULT_ZOOKEEPER_ZNODE_PARENT = "/hbase"; - /** Parameter name for port region server listens on. */ public static final String REGIONSERVER_PORT = "hbase.regionserver.port"; /** Default port region server listens on. */ public static final int DEFAULT_REGIONSERVER_PORT = 60020; - /** default port for region server web api */ - public static final int DEFAULT_REGIONSERVER_INFOPORT = 60030; - - /** A configuration key for regionserver info port */ - public static final String REGIONSERVER_INFO_PORT = - "hbase.regionserver.info.port"; - - /** A flag that enables automatic selection of regionserver info port */ - public static final String REGIONSERVER_INFO_PORT_AUTO = - REGIONSERVER_INFO_PORT + ".auto"; - - /** Parameter name for what region server interface to use. */ - public static final String REGION_SERVER_CLASS = "hbase.regionserver.class"; - - /** Parameter name for what region server implementation to use. */ - public static final String REGION_SERVER_IMPL= "hbase.regionserver.impl"; - /** Parameter name for whether region server is running in the hydrabase mode. */ public static final String HYDRABASE = "hbase.hydrabase"; @@ -222,262 +48,7 @@ public final class HConstants { public static final boolean DEFAULT_HYDRABASE = false; -//Amit: For Raft this is not needed: /** Default region server interface class name. */ -//Amit: For Raft this is not needed: public static final String DEFAULT_REGION_SERVER_CLASS = HRegionInterface.class.getName(); - - /** Parameter name for enabling regionChecker */ - public static final String REGION_CHECKER_ENABLED = "hbase.master.regionchecker.enabled"; - /** Default value for enabling regionChecker */ - public static final Boolean DEFAULT_REGION_CHECKER_ENABLED = false; - - /** Parameter name for what compaction manager to use. */ - public static final String COMPACTION_MANAGER_CLASS = "hbase.compactionmanager.class"; - - /** Parameter name for the number of large compaction threads */ - public static final String LARGE_COMPACTION_THREADS = - "hbase.regionserver.thread.compaction.large"; - - /** Default number of large compaction threads */ - public static final int DEFAULT_LARGE_COMPACTION_THREADS = 1; - - /** Parameter name for the number of large compaction threads */ - public static final String SMALL_COMPACTION_THREADS = - "hbase.regionserver.thread.compaction.small"; - - /** Default number of small compaction threads */ - public static final int DEFAULT_SMALL_COMPACTION_THREADS = 1; - - /** Prefix for Compaction related configurations in Store */ - public static final String HSTORE_COMPACTION_PREFIX = - "hbase.hstore.compaction."; - - /** Parameter name for the number of split threads */ - public static final String SPLIT_THREADS = "hbase.regionserver.thread.split"; - - /** Default number of split threads */ - public static final int DEFAULT_SPLIT_THREADS = 1; - - /** Parameter name for what master implementation to use. */ - public static final String MASTER_IMPL = "hbase.master.impl"; - - /** Parameter name for how often threads should wake up */ - public static final String THREAD_WAKE_FREQUENCY = "hbase.server.thread.wakefrequency"; - - /** Parameter name for how often a region should should perform a major compaction */ - public static final String MAJOR_COMPACTION_PERIOD = "hbase.hregion.majorcompaction"; - - /** Parameter name for HBase instance root directory */ - public static final String HBASE_DIR = "hbase.rootdir"; - - /** Parameter name for explicit region placement */ - public static final String LOAD_BALANCER_IMPL = "hbase.loadbalancer.impl"; - - /** Used to construct the name of the log directory for a region server - * Use '.' as a special character to seperate the log files from table data */ - public static final String HREGION_LOGDIR_NAME = ".logs"; - - /** Like the previous, but for old logs that are about to be deleted */ - public static final String HREGION_OLDLOGDIR_NAME = ".oldlogs"; - - /** Boolean config to determine if we should use a subdir structure - * in the .oldlogs directory */ - public static final String HREGION_OLDLOGDIR_USE_SUBDIR_STRUCTURE = - "hbase.regionserver.oldlogs.use.subdir.structure"; - - /** Boolean config to determine if we should use a subdir structure in - * the .oldlogs directory by default */ - public static final boolean HREGION_OLDLOGDIR_USE_SUBDIR_STRUCTURE_DEFAULT = - true; - - /** Used to construct the name of the compaction directory during compaction */ - public static final String HREGION_COMPACTIONDIR_NAME = "compaction.dir"; - - /** Conf key for the max file size after which we split the region */ - public static final String HREGION_MAX_FILESIZE = - "hbase.hregion.max.filesize"; - - /** File Extension used while splitting an HLog into regions (HBASE-2312) */ - public static final String HLOG_SPLITTING_EXT = "-splitting"; - - /** - * The max number of threads used for opening and closing stores or store - * files in parallel - */ - public static final String HSTORE_OPEN_AND_CLOSE_THREADS_MAX = - "hbase.hstore.open.and.close.threads.max"; - - /** - * The default number for the max number of threads used for opening and - * closing stores or store files in parallel - */ - public static final int DEFAULT_HSTORE_OPEN_AND_CLOSE_THREADS_MAX = 8; - - /** - * The max number of threads used for opening and closing regions - * in parallel - */ - public static final String HREGION_OPEN_AND_CLOSE_THREADS_MAX = - "hbase.region.open.and.close.threads.max"; - - /** - * The default number for the max number of threads used for opening and - * closing regions in parallel - */ - public static final int DEFAULT_HREGION_OPEN_AND_CLOSE_THREADS_MAX = 20; - - /** - * The max number of threads used for splitting logs - * in parallel - */ - public static final String HREGIONSERVER_SPLITLOG_WORKERS_NUM = - "hbase.hregionserver.hlog.split.workers.num"; - - /** - * If using quorum reads from HDFS, the maximum size of the thread pool. - * value <= 0 disables quorum reads. - */ - public static final String HDFS_QUORUM_READ_THREADS_MAX = - "hbase.dfsclient.quorum.reads.threads.max"; - - /** - * The default number for the size of thread pool used in quorum reads. - * value <= 0 disables quorum reads. - */ - public static final int DEFAULT_HDFS_QUORUM_READ_THREADS_MAX = 50; - - /** - * If using quorum reads from HDFS, the timeout of using another region server. - */ - public static final String HDFS_QUORUM_READ_TIMEOUT_MILLIS = - "hbase.dfsclient.quorum.reads.timeout"; - public static final long DEFAULT_HDFS_QUORUM_READ_TIMEOUT_MILLIS = 0; - - /** Default maximum file size */ - public static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024; - - /** Default minimum number of files to be compacted */ - public static final int DEFAULT_MIN_FILES_TO_COMPACT = 3; - - /** Default value for files without minFlushTime in metadata */ - public static final long NO_MIN_FLUSH_TIME = -1; - - /** Conf key for the memstore size at which we flush the memstore */ - public static final String HREGION_MEMSTORE_FLUSH_SIZE = - "hbase.hregion.memstore.flush.size"; - - /** Conf key for enabling Per Column Family flushing of memstores */ - public static final String HREGION_MEMSTORE_PER_COLUMN_FAMILY_FLUSH = - "hbase.hregion.memstore.percolumnfamilyflush.enabled"; - - /** Default value for the Per Column Family flush knob */ - public static final Boolean DEFAULT_HREGION_MEMSTORE_PER_COLUMN_FAMILY_FLUSH = - false; - - /** - * If Per Column Family flushing is enabled, this is the minimum size - * at which a column family's memstore is flushed. - */ - public static final String HREGION_MEMSTORE_COLUMNFAMILY_FLUSH_SIZE = - "hbase.hregion.memstore.percolumnfamilyflush.flush.size"; - - public static final String HREGION_MEMSTORE_BLOCK_MULTIPLIER = - "hbase.hregion.memstore.block.multiplier"; - public static final String HREGION_MEMSTORE_WAIT_ON_BLOCK = - "hbase.hregion.memstore.block.waitonblock"; - - /** Default size of a reservation block */ - public static final int DEFAULT_SIZE_RESERVATION_BLOCK = 1024 * 1024 * 5; - - /** Maximum value length, enforced on KeyValue construction */ - public static final int MAXIMUM_VALUE_LENGTH = Integer.MAX_VALUE; - - /** Conf key for enabling/disabling server profiling */ - public static final String HREGIONSERVER_ENABLE_SERVERSIDE_PROFILING = - "hbase.regionserver.enable.serverside.profiling"; - - /** Conf key for the preload blocks count if preloading is enabled for some scanner */ - public static final String SCAN_PRELOAD_BLOCK_COUNT = - "hbase.regionserver.preload.block.count"; - /** Default number of blocks to preload during sequential scan of hfile (if enabled)*/ - public static final int DEFAULT_PRELOAD_BLOCK_COUNT = 64; - /** Conf key for the core preload threads */ - public static final String CORE_PRELOAD_THREAD_COUNT = "hbase.regionserver.core.preload.thread.count"; - /** Default number of core preload threads per region server */ - public static final int DEFAULT_CORE_PRELOAD_THREAD_COUNT = 1; - /** Conf key for the max preload threads */ - public static final String MAX_PRELOAD_THREAD_COUNT = "hbase.regionserver.max.preload.thread.count"; - /** Defualt number of core preload threads per region server */ - public static final int DEFAULT_MAX_PRELOAD_THREAD_COUNT = 64; - /** Conf key for max preload blocks kept in cache per hfilescanner */ - public static final String MAX_PRELOAD_BLOCKS_KEPT_IN_CACHE = - "hbase.regionserver.preload.blocks.kept.in.cache"; - /** Default maximum number of preload blocks to keep in block cache per hfilescanner */ - public static final int DEFAULT_MAX_PRELOAD_BLOCKS_KEPT_IN_CACHE = 128; - - // Always store the location of the root table's HRegion. - // This HRegion is never split. - - - // region name = table + startkey + regionid. This is the row key. - // each row in the root and meta tables describes exactly 1 region - // Do we ever need to know all the information that we are storing? - - // Note that the name of the root table starts with "-" and the name of the - // meta table starts with "." Why? it's a trick. It turns out that when we - // store region names in memory, we use a SortedMap. Since "-" sorts before - // "." (and since no other table name can start with either of these - // characters, the root region will always be the first entry in such a Map, - // followed by all the meta regions (which will be ordered by their starting - // row key as well), followed by all user tables. So when the Master is - // choosing regions to assign, it will always choose the root region first, - // followed by the meta regions, followed by user regions. Since the root - // and meta regions always need to be on-line, this ensures that they will - // be the first to be reassigned if the server(s) they are being served by - // should go down. - - - // - // New stuff. Making a slow transition. - // - - /** The root table's name.*/ - public static final byte [] ROOT_TABLE_NAME = Bytes.toBytes("-ROOT-"); - - /** The META table's name. */ - public static final byte [] META_TABLE_NAME = Bytes.toBytes(".META."); - - /** delimiter used between portions of a region name */ - public static final int META_ROW_DELIMITER = ','; - - /** The catalog family as a string*/ - public static final String CATALOG_FAMILY_STR = "info"; - - /** The catalog family */ - public static final byte [] CATALOG_FAMILY = Bytes.toBytes(CATALOG_FAMILY_STR); - - /** The catalog historian family */ - public static final byte [] CATALOG_HISTORIAN_FAMILY = Bytes.toBytes("historian"); - - /** The regioninfo column qualifier */ - public static final byte [] REGIONINFO_QUALIFIER = Bytes.toBytes("regioninfo"); - - /** The server column qualifier */ - public static final byte [] SERVER_QUALIFIER = Bytes.toBytes("server"); - - /** The startcode column qualifier */ - public static final byte [] STARTCODE_QUALIFIER = Bytes.toBytes("serverstartcode"); - - /** The lower-half split region column qualifier */ - public static final byte [] SPLITA_QUALIFIER = Bytes.toBytes("splitA"); - - /** The upper-half split region column qualifier */ - public static final byte [] SPLITB_QUALIFIER = Bytes.toBytes("splitB"); - - /** The favored nodes column qualifier*/ - public static final byte [] FAVOREDNODES_QUALIFIER = Bytes.toBytes("favorednodes"); - // Other constants - /** * An empty instance. */ @@ -513,312 +84,13 @@ public final class HConstants { /** When we encode strings, we always specify UTF8 encoding */ public static final String UTF8_ENCODING = "UTF-8"; - /** - * Timestamp to use when we want to refer to the latest cell. - * This is the timestamp sent by clients when no timestamp is specified on - * commit. - */ - public static final long LATEST_TIMESTAMP = Long.MAX_VALUE; - - /** - * Timestamp to use when we want to refer to the oldest cell. - */ - public static final long OLDEST_TIMESTAMP = Long.MIN_VALUE; - - /** - * LATEST_TIMESTAMP in bytes form - */ - public static final byte [] LATEST_TIMESTAMP_BYTES = Bytes.toBytes(LATEST_TIMESTAMP); - /** - * Define for 'return-all-versions'. - */ - public static final int ALL_VERSIONS = Integer.MAX_VALUE; - - /** - * Unlimited time-to-live. - */ -// public static final int FOREVER = -1; - public static final int FOREVER = Integer.MAX_VALUE; - - /** - * Seconds in a week - */ - public static final int WEEK_IN_SECONDS = 7 * 24 * 3600; - - //TODO: although the following are referenced widely to format strings for - // the shell. They really aren't a part of the public API. It would be - // nice if we could put them somewhere where they did not need to be - // public. They could have package visibility - public static final String NAME = "NAME"; - public static final String VERSIONS = "VERSIONS"; - public static final String IN_MEMORY = "IN_MEMORY"; - public static final String CONFIG = "CONFIG"; - - /** - * This is a retry backoff multiplier table similar to the BSD TCP syn - * backoff table, a bit more aggressive than simple exponential backoff. - */ public static int RETRY_BACKOFF[] = { 1, 1, 1, 2, 2, 4, 4, 8, 16, 32 }; public static final String REGION_IMPL = "hbase.hregion.impl"; - /** modifyTable op for replacing the table descriptor */ - public static enum Modify { - CLOSE_REGION, - MOVE_REGION, - TABLE_COMPACT, - TABLE_FLUSH, - TABLE_MAJOR_COMPACT, - TABLE_SET_HTD, - TABLE_SPLIT, - TABLE_EXPLICIT_SPLIT - } - - /** - * Scope tag for locally scoped data. - * This data will not be replicated. - */ - public static final int REPLICATION_SCOPE_LOCAL = 0; - - /** - * Scope tag for globally scoped data. - * This data will be replicated to all peers. - */ - public static final int REPLICATION_SCOPE_GLOBAL = 1; - - /** - * Default cluster ID, cannot be used to identify a cluster so a key with - * this value means it wasn't meant for replication. - */ - public static final byte DEFAULT_CLUSTER_ID = 0; - - /** - * Parameter name for maximum number of bytes returned when calling a - * scanner's next method. - */ - public static final String HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY = "hbase.client.scanner.max.result.size"; - - /** - * Parameter name for the number of threads for the ParallelScanner - */ - public static final String HBASE_CLIENT_PARALLEL_SCANNER_THREAD = - "hbase.client.parallel.scanner.thread"; - - /** - * The default number of threads for the ParallelScanner - */ - public static final int HBASE_CLIENT_PARALLEL_SCANNER_THREAD_DEFAULT = 100; - - /** - * Maximum number of bytes returned when calling a scanner's next method. - * Note that when a single row is larger than this limit the row is still - * returned completely. - * - * The default value is unlimited. - */ - public static final long DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE = Long.MAX_VALUE; - - - /** - * Maximum number of bytes returned when calling a scanner's next method. - * Used with partialRow parameter on the client side. Note that when a - * single row is larger than this limit, the row is still returned completely - * if partialRow is true, otherwise, the row will be truncated in order to - * fit the memory. - */ - public static final int DEFAULT_HBASE_SCANNER_MAX_RESULT_SIZE = Integer.MAX_VALUE; - - /** - * HRegion server lease period in milliseconds. Clients must report in within this period - * else they are considered dead. Unit measured in ms (milliseconds). - */ - public static final String HBASE_REGIONSERVER_LEASE_PERIOD_KEY = "hbase.regionserver.lease.period"; - - - /** - * Default value of {@link #HBASE_REGIONSERVER_LEASE_PERIOD_KEY}. - */ - public static final long DEFAULT_HBASE_REGIONSERVER_LEASE_PERIOD = 60000; - - /** - * timeout for each RPC - */ - public static final String HBASE_RPC_TIMEOUT_KEY = "hbase.rpc.timeout"; - public static final String HBASE_RS_REPORT_TIMEOUT_KEY = "hbase.regionserverReport.timeout"; - - /** - * Default value of {@link #HBASE_RPC_TIMEOUT_KEY} - */ - public static final int DEFAULT_HBASE_RPC_TIMEOUT = 60000; - public static final int DEFAULT_RS_REPORT_TIMEOUT = 3000; - - /** - * pause between rpc or connect retries - */ - public static final String HBASE_CLIENT_PAUSE = "hbase.client.pause"; - public static final int DEFAULT_HBASE_CLIENT_PAUSE = 1000; - - /** - * compression for each RPC and its default value - */ - public static final String HBASE_RPC_COMPRESSION_KEY = "hbase.rpc.compression"; - public static final Compression.Algorithm DEFAULT_HBASE_RPC_COMPRESSION = - Compression.Algorithm.NONE; - - public static final String - REPLICATION_ENABLE_KEY = "hbase.replication"; - - /** - * Configuration key for the size of the block cache - */ - public static final String HFILE_BLOCK_CACHE_SIZE_KEY = - "hfile.block.cache.size"; - - public static final float HFILE_BLOCK_CACHE_SIZE_DEFAULT = 0.25f; - - /** The delay when re-trying a socket operation in a loop (HBASE-4712) */ - public static final int SOCKET_RETRY_WAIT_MS = 200; - - /** Host name of the local machine */ - public static final String LOCALHOST = "localhost"; - - public static final String LOCALHOST_IP = "127.0.0.1"; - - /** Conf key that enables distributed log splitting */ - public static final String DISTRIBUTED_LOG_SPLITTING_KEY = - "hbase.master.distributed.log.splitting"; - - public static final int REGION_SERVER_MSG_INTERVAL = 1 * 1000; - - /** The number of favored nodes for each region */ - public static final int FAVORED_NODES_NUM = 3; - - public static final String UNKNOWN_RACK = "Unknown Rack"; - - /** Delay when waiting for a variable (HBASE-4712) */ - public static final int VARIABLE_WAIT_TIME_MS = 40; - - public static final String LOAD_BALANCER_SLOP_KEY = "hbase.regions.slop"; - - // Thrift server configuration options - - /** Configuration key prefix for the stand-alone thrift proxy */ - public static final String THRIFT_PROXY_PREFIX = "hbase.thrift."; - - /** Configuration key prefix for thrift server embedded into the region server */ - public static final String RS_THRIFT_PREFIX = "hbase.regionserver.thrift."; - - /** Default port for the stand-alone thrift proxy */ - public static final int DEFAULT_THRIFT_PROXY_PORT = 9090; - - /** Default port for the thrift server embedded into regionserver */ - public static final int DEFAULT_RS_THRIFT_SERVER_PORT = 9091; - - /** Configuration key suffix for thrift server type (e.g. thread pool, nonblocking, etc.) */ - public static final String THRIFT_SERVER_TYPE_SUFFIX = "server.type"; - - /** Configuration key suffix for the IP address for thrift server to bind to */ - public static final String THRIFT_BIND_SUFFIX = "ipaddress"; - - /** Configuration key suffix for whether to use compact Thrift transport */ - public static final String THRIFT_COMPACT_SUFFIX = "compact"; - - /** Configuration key suffix for whether to use framed Thrift transport */ - public static final String THRIFT_FRAMED_SUFFIX = "framed"; - - /** Configuration key suffix for Thrift server port */ - public static final String THRIFT_PORT_SUFFIX = "port"; - - /** The number of HLogs for each region server */ - public static final String HLOG_CNT_PER_SERVER = "hbase.regionserver.hlog.cnt.perserver"; - - public static final String HLOG_FORMAT_BACKWARD_COMPATIBILITY = - "hbase.regionserver.hlog.format.backward.compatibility"; - - /** - * The byte array represents for NO_NEXT_INDEXED_KEY; - * The actual value is irrelevant because this is always compared by reference. - */ - public static final byte [] NO_NEXT_INDEXED_KEY = Bytes.toBytes("NO_NEXT_INDEXED_KEY"); - - public static final int MULTIPUT_SUCCESS = -1; - - public static final boolean[] BOOLEAN_VALUES = { false, true }; - - public static final int IPC_CALL_PARAMETER_LENGTH_MAX = 1000; - - /** - * Used in Configuration to get/set the KV aggregator - */ - public static final String KV_AGGREGATOR = "kvaggregator"; - - /** - * Used in Configuration to get/set the compaction hook - */ - public static final String COMPACTION_HOOK = "compaction_hook"; - - /** - * Absolute path of the external jar which will contain the custom compaction hook - */ - public static final String COMPACTION_HOOK_JAR = "compaction_hook_jar"; - - public static final String GENERAL_BLOOM_FILTER = "general_bloom_filter"; - - public static final String DELETE_FAMILY_BLOOM_FILTER = "delete_family_bloom_filter"; - - public static final String DELETE_COLUMN_BLOOM_FILTER = "delete_column_bloom_filter"; - - public static final String ROWKEY_PREFIX_BLOOM_FILTER = "rowkey_prefix_bloom_filter"; - - /** - * This will enable/disable the usage of delete col bloom filter. Note that - * this won't enable/disable the delete bloom filter for being written/read. - * In fact, we could read and write it but we will not use it when we scan - * data, thus we won't do the optimized reads. In order to disable/enable the - * filter for write&read both, use - * BloomFilterFactory.IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED - */ - public static final boolean USE_DELETE_COLUMN_BLOOM_FILTER = true; - public static final String USE_DELETE_COLUMN_BLOOM_FILTER_STRING = "use_delete_column_bloom_filter"; - - // Delaying the region server load balancing by the following amount for a - // load balancing where source is a favored region server. - public static final String HBASE_REGION_ASSIGNMENT_LOADBALANCER_WAITTIME_MS - = "hbase.master.assignment.load.balancer.waittime.ms"; - public static final int DEFAULT_HBASE_REGION_ASSIGNMENT_LOADBALANCER_WAITTIME_MS = 60000; - - /* - * This defines the number of buckets used for computing the histogram of - * pread latency. - */ - public static final String PREAD_LATENCY_HISTOGRAM_NUM_BUCKETS = - "hbase.histogrambasedmetric.numbuckets.preadlatency"; - - /* - * This defines the number of buckets used for computing the histogram of - * pread latency during compaction. - */ - public static final String PREAD_COMPACTION_LATENCY_HISTOGRAM_NUM_BUCKETS = - "hbase.histogrambasedmetric.numbuckets.preadcompactionlatency"; - public static final String HISTOGRAM_BASED_METRICS_WINDOW = - "hbase.histogrambasedmetric.window"; - /* - * This is the folder address for the hard links folder where the - * hard links are created during creating a read only store. - */ - public static final String READ_ONLY_HARDLINKS_FOLDER = - "hbase.store.readonly.hardlinks.folder"; - public static final String READ_ONLY_HARDLINKS_FOLDER_DEFAULT = - "/tmp/hardlinks/"; - - public static final String CLIENT_SOCKED_CLOSED_EXC_MSG = "Interrupting the read request"; - public static final String SERVER_INTERRUPTED_CALLS_KEY = "serverInterruptedCalls"; - public static final String RMAP_SUBSCRIPTION = "hbase.rmap.subscriptions"; - public static final String DEFAULT_RMAP_NAME = "rmap.json"; - /** * How much time to wait in HydraBaseAdmin.applyRMap() for the RMap to be * successfully applied. @@ -944,118 +216,6 @@ public final class HConstants { public static final long RAFT_BATCH_APPEND_TRY_CANDIDATE_LOGS_PROMOTION_THRESHOLD_DEFAULT = 1024L; - /** - * Controls whether we use one or two throttles to control the insert in the queue - */ - public static final String USE_MULTIPLE_THROTTLES = "hbase.server.multithrottler"; - - /** - * How much memory do we want for the blocking callqueue, used in HBaseServer - */ - public static final long MAX_CALL_QUEUE_MEMORY_SIZE = 1024*1024*1024; - public static final String MAX_CALL_QUEUE_MEMORY_SIZE_STRING = "max.callqueue.memory.size"; - - /** - * Used in HBase Server, when we use the multithrottler for the callQueue - */ - public static final long MAX_SMALLER_CALL_QUEUE_MEMORY_SIZE = 256*1024*1024; - public static final String MAX_SMALLER_CALL_QUEUE_MEMORY_SIZE_STRING = "max.smaller.callqueue.memory.size"; - public static final long MAX_LARGER_CALL_QUEUE_MEMORY_SIZE = 768*1024*1024; - public static final String MAX_LARGER_CALL_QUEUE_MEMORY_SIZE_STRING = "max.larger.callqueue.memory.size"; - public static final int SMALL_QUEUE_REQUEST_LIMIT = 25*1024*1024; - public static final String SMALL_QUEUE_REQUEST_LIMIT_STRING = "small.queue.request.limit"; - - // These are the IO priority values for various regionserver operations. Note - // that these are priorities relative to each other. See the man page for - // ioprio_set for more details. The default priority for a process with nice - // value 0 is 4. The priorities range from 0 (highest) to 7 (lowest). - // - // The thinking behind the various priorities are as follows : - // 1. PREAD priority is the highest since client reads are extremely critical. - // 2. Although HLOG sync is as important as a pread (since the client - // blocks on it.). But the HLOG sync never hits disk in the critical path - // and these priorities are when the kernel scheduler writes data to the - // persistent store. This priority will only be considered when we close the - // HLOG and help in reducing any stalls while closing the hlog. - // 3. The priority for flush is more than compaction since if we don't flush - // quickly enough, the memstore might grow too much and block client updates. - public static final int PREAD_PRIORITY = 0; - public static final int HLOG_PRIORITY = 1; - public static final int FLUSH_PRIORITY = 2; - public static final int COMPACT_PRIORITY = 3; - - // We use the Best Effort class always since RealTime and Idle are too - // extreme. Again check man pages for ioprio_set for more details. -// public static final int IOPRIO_CLASSOF_SERVICE = NativeIO.IOPRIO_CLASS_BE; - - public static final String HBASE_ENABLE_QOS_KEY = "hbase.enable.qos"; - public static final String HBASE_ENABLE_SYNCFILERANGE_THROTTLING_KEY = "hbase.enable.syncfilerange.throttling"; - - /* - * MSLAB Constants - */ - public final static String MSLAB_CHUNK_POOL_MAX_SIZE_KEY = "hbase.hregion.memstore.chunkpool.maxsize"; - public final static String MSLAB_CHUNK_POOL_INITIAL_SIZE_KEY = "hbase.hregion.memstore.chunkpool.initialsize"; - public final static float MSLAB_POOL_MAX_SIZE_DEFAULT = 0.0f; - public final static float MSLAB_POOL_INITIAL_SIZE_DEFAULT = 0.0f; - - public final static String MSLAB_CHUNK_SIZE_KEY = "hbase.hregion.memstore.mslab.chunksize"; - public final static int MSLAB_CHUNK_SIZE_DEFAULT = 2 * 1024 * 1024; - - public final static String MSLAB_MAX_ALLOC_KEY = "hbase.hregion.memstore.mslab.max.allocation"; - public final static int MSLAB_MAX_ALLOC_DEFAULT = 256 * 1024; // allocs bigger than this don't go through allocator - - public final static String MSLAB_MAX_SIZE_KEY = "hbase.hregion.memstore.mslab.max.size"; - public final static float MSLAB_MAX_SIZE_DEFAULT = 1.25f; // Stop using SLAB if larger than this percentage of memstore size - - public final static float MSLAB_PCT_LOWER_LIMIT = 0.0f; - public final static float MSLAB_PCT_UPPER_LIMIT = 2.0f; - - - /* - * Memstore Linear search limit - */ - public final static String MEMSTORE_RESEEK_LINEAR_SEARCH_LIMIT_KEY = "hbase.hregion.memstore.linear.search.limit"; - public final static int MEMSTORE_RESEEK_LINEAR_SEARCH_LIMIT_DEFAULT = 20; - - public static final String USE_MSLAB_KEY = "hbase.hregion.memstore.mslab.enabled"; - public static final boolean USE_MSLAB_DEFAULT = false; - - /** - * This wait time is used to periodically probe until - * we exhaust the timeout in the window - */ - public static final String WAIT_TIME_FOR_FLUSH_MS = - "hbase.hregion.flush.waittime"; - public static final long DEFAULT_WAIT_TIME_FOR_FLUSH_MS = 100; //ms - - /** - * The knob to turn on the ClientLocalScanner to flush and wait for the - * region flush to finish before it opens the store files. - * Set the socket timeout for the RPC appropriately for this. - */ - public static final String CLIENT_LOCAL_SCANNER_FLUSH_AND_WAIT = - "hbase.clientlocalscanner.flush.and.wait"; - public static final boolean DEFAULT_CLIENT_LOCAL_SCANNER_FLUSH_AND_WAIT = - false; - - /** - * The acceptable staleness of a flush. Say if this value is set to 10s, - * if there was a flush in the last 10s, we would not flush again. - */ - public static final String CLIENT_LOCAL_SCANNER_FLUSH_ACCEPTABLE_STALENESS_MS = - "hbase.clientlocalscanner.flush.acceptable.staleness"; - public static final long DEFAULT_CLIENT_LOCAL_SCANNER_FLUSH_ACCEPTABLE_STALENESS_MS = - 30000; // ms - - /** - * The extra wait time that we wait for the flush to take place. - */ - public static final String CLIENT_LOCAL_SCANNER_MAX_WAITTIME_FOR_FLUSH_MS = - "hbase.clientlocal.scanner.flush.maxwaittime"; - public static final int DEFAULT_CLIENT_LOCAL_SCANNER_MAX_WAITTIME_FOR_FLUSH_MS - = 10000; // ms - public static final String RAFT_TRANSACTION_LOG_DIRECTORY_KEY = "hbase.consensus.log.path"; @@ -1118,6 +278,10 @@ public final class HConstants { public static final byte QUORUM_MEMBERSHIP_CHANGE_VERSION = 1; + public static final byte QUORUM_EDIT_TYPE = 3; + + public static final byte QUORUM_EDIT_VERSION = 1; + public static final String CONSENSUS_TRANCTION_LOG_RETENTION_TIME_KEY = "hbase.consensus.log.retention.time"; diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java deleted file mode 100644 index 380dfaa..0000000 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java +++ /dev/null @@ -1,792 +0,0 @@ -/** - * Copyright 2009 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.io.hfile.Compression; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.io.WritableComparable; - -/** - * HTableDescriptor contains the name of an HTable, and its - * column families. - */ -public class HTableDescriptor implements WritableComparable { - - // Changes prior to version 3 were not recorded here. - // Version 3 adds metadata as a map where keys and values are byte[]. - // Version 4 adds indexes - // Version 5 removed transactional pollution -- e.g. indexes - public static final byte TABLE_DESCRIPTOR_VERSION = 5; - - private byte [] name = HConstants.EMPTY_BYTE_ARRAY; - private String nameAsString = ""; - - // Table metadata - protected final Map values = - new HashMap(); - - public static final String FAMILIES = "FAMILIES"; - public static final ImmutableBytesWritable FAMILIES_KEY = - new ImmutableBytesWritable(Bytes.toBytes(FAMILIES)); - public static final String MAX_FILESIZE = "MAX_FILESIZE"; - public static final ImmutableBytesWritable MAX_FILESIZE_KEY = - new ImmutableBytesWritable(Bytes.toBytes(MAX_FILESIZE)); - public static final String READONLY = "READONLY"; - public static final ImmutableBytesWritable READONLY_KEY = - new ImmutableBytesWritable(Bytes.toBytes(READONLY)); - public static final String MEMSTORE_FLUSHSIZE = "MEMSTORE_FLUSHSIZE"; - public static final ImmutableBytesWritable MEMSTORE_FLUSHSIZE_KEY = - new ImmutableBytesWritable(Bytes.toBytes(MEMSTORE_FLUSHSIZE)); - public static final String IS_ROOT = "IS_ROOT"; - public static final ImmutableBytesWritable IS_ROOT_KEY = - new ImmutableBytesWritable(Bytes.toBytes(IS_ROOT)); - public static final String IS_META = "IS_META"; - - public static final ImmutableBytesWritable IS_META_KEY = - new ImmutableBytesWritable(Bytes.toBytes(IS_META)); - - public static final String DEFERRED_LOG_FLUSH = "DEFERRED_LOG_FLUSH"; - public static final ImmutableBytesWritable DEFERRED_LOG_FLUSH_KEY = - new ImmutableBytesWritable(Bytes.toBytes(DEFERRED_LOG_FLUSH)); - - public static final String DISABLE_WAL = "DISABLE_WAL"; - public static final ImmutableBytesWritable DISABLE_WAL_KEY = - new ImmutableBytesWritable(Bytes.toBytes(DISABLE_WAL)); - - - // The below are ugly but better than creating them each time till we - // replace booleans being saved as Strings with plain booleans. Need a - // migration script to do this. TODO. - private static final ImmutableBytesWritable FALSE = - new ImmutableBytesWritable(Bytes.toBytes(Boolean.FALSE.toString())); - private static final ImmutableBytesWritable TRUE = - new ImmutableBytesWritable(Bytes.toBytes(Boolean.TRUE.toString())); - - public static final boolean DEFAULT_READONLY = false; - - public static final long DEFAULT_MEMSTORE_FLUSH_SIZE = 1024*1024*64L; - - public static final long DEFAULT_MEMSTORE_COLUMNFAMILY_FLUSH_SIZE = - 1024*1024*16L; - - public static final long DEFAULT_MAX_FILESIZE = 1024*1024*256L; - - public static final boolean DEFAULT_DEFERRED_LOG_FLUSH = true; - - private final static Map DEFAULT_VALUES - = new HashMap(); - private final static Set RESERVED_KEYWORDS - = new HashSet(); - static { - DEFAULT_VALUES.put(MAX_FILESIZE, String.valueOf(DEFAULT_MAX_FILESIZE)); - DEFAULT_VALUES.put(READONLY, String.valueOf(DEFAULT_READONLY)); - DEFAULT_VALUES.put(MEMSTORE_FLUSHSIZE, - String.valueOf(DEFAULT_MEMSTORE_FLUSH_SIZE)); - DEFAULT_VALUES.put(DEFERRED_LOG_FLUSH, - String.valueOf(DEFAULT_DEFERRED_LOG_FLUSH)); - for (String s : DEFAULT_VALUES.keySet()) { - RESERVED_KEYWORDS.add(new ImmutableBytesWritable(Bytes.toBytes(s))); - } - RESERVED_KEYWORDS.add(IS_ROOT_KEY); - RESERVED_KEYWORDS.add(IS_META_KEY); - } - - private volatile Boolean meta = null; - private volatile Boolean root = null; - private Boolean isDeferredLog = null; - - // Key is hash of the family name. - public final Map families = - new TreeMap(Bytes.BYTES_RAWCOMPARATOR); - - /** - * Private constructor used internally creating table descriptors for - * catalog tables: e.g. .META. and -ROOT-. - */ - public HTableDescriptor(final byte [] name, HColumnDescriptor[] families) { - this.name = name.clone(); - this.nameAsString = Bytes.toString(this.name); - setMetaFlags(name); - for(HColumnDescriptor descriptor : families) { - this.families.put(descriptor.getName(), descriptor); - } - } - - /** - * Private constructor used internally creating table descriptors for - * catalog tables: e.g. .META. and -ROOT-. - */ - protected HTableDescriptor(final byte [] name, HColumnDescriptor[] families, - Map values) { - this.name = name.clone(); - this.nameAsString = Bytes.toString(this.name); - setMetaFlags(name); - for(HColumnDescriptor descriptor : families) { - this.families.put(descriptor.getName(), descriptor); - } - for (Map.Entry entry: - values.entrySet()) { - this.values.put(entry.getKey(), entry.getValue()); - } - } - - - /** - * Constructs an empty object. - * For deserializing an HTableDescriptor instance only. - * @see #HTableDescriptor(byte[]) - */ - public HTableDescriptor() { - super(); - } - - /** - * Constructor. - * @param name Table name. - * @throws IllegalArgumentException if passed a table name - * that is made of other than 'word' characters, underscore or period: i.e. - * [a-zA-Z_0-9.]. - * @see HADOOP-1581 HBASE: Un-openable tablename bug - */ - public HTableDescriptor(final String name) { - this(Bytes.toBytes(name)); - } - - /** - * Constructor. - * @param name Table name. - * @throws IllegalArgumentException if passed a table name - * that is made of other than 'word' characters, underscore or period: i.e. - * [a-zA-Z_0-9-.]. - * @see HADOOP-1581 HBASE: Un-openable tablename bug - */ - public HTableDescriptor(final byte [] name) { - super(); - setMetaFlags(name); - this.name = this.isMetaRegion()? name: isLegalTableName(name); - this.nameAsString = Bytes.toString(this.name); - } - - /** - * Constructor. - *

- * Makes a deep copy of the supplied descriptor. - * Can make a modifiable descriptor from an UnmodifyableHTableDescriptor. - * @param desc The descriptor. - */ - public HTableDescriptor(final HTableDescriptor desc) { - super(); - this.name = desc.name.clone(); - this.nameAsString = Bytes.toString(this.name); - setMetaFlags(this.name); - for (HColumnDescriptor c: desc.families.values()) { - this.families.put(c.getName(), new HColumnDescriptor(c)); - } - for (Map.Entry e: - desc.values.entrySet()) { - this.values.put(e.getKey(), e.getValue()); - } - } - - /* - * Set meta flags on this table. - * Called by constructors. - * @param name - */ - private void setMetaFlags(final byte [] name) { - setRootRegion(Bytes.equals(name, HConstants.ROOT_TABLE_NAME)); - setMetaRegion(isRootRegion() || - Bytes.equals(name, HConstants.META_TABLE_NAME)); - } - - /** @return true if this is the root region */ - public boolean isRootRegion() { - if (this.root == null) { - this.root = isSomething(IS_ROOT_KEY, false)? Boolean.TRUE: Boolean.FALSE; - } - return this.root.booleanValue(); - } - - /** @param isRoot true if this is the root region */ - protected void setRootRegion(boolean isRoot) { - // TODO: Make the value a boolean rather than String of boolean. - values.put(IS_ROOT_KEY, isRoot? TRUE: FALSE); - } - - /** @return true if this is a meta region (part of the root or meta tables) */ - public boolean isMetaRegion() { - if (this.meta == null) { - this.meta = calculateIsMetaRegion(); - } - return this.meta.booleanValue(); - } - - private synchronized Boolean calculateIsMetaRegion() { - byte [] value = getValue(IS_META_KEY); - return (value != null)? Boolean.valueOf(Bytes.toString(value)): Boolean.FALSE; - } - - private boolean isSomething(final ImmutableBytesWritable key, - final boolean valueIfNull) { - byte [] value = getValue(key); - if (value != null) { - // TODO: Make value be a boolean rather than String of boolean. - return Boolean.valueOf(Bytes.toString(value)).booleanValue(); - } - return valueIfNull; - } - - /** - * @param isMeta true if this is a meta region (part of the root or meta - * tables) */ - protected void setMetaRegion(boolean isMeta) { - values.put(IS_META_KEY, isMeta? TRUE: FALSE); - } - - /** @return true if table is the meta table */ - public boolean isMetaTable() { - return isMetaRegion() && !isRootRegion(); - } - - /** - * Check passed buffer is legal user-space table name. - * @param b Table name. - * @return Returns passed b param - * @throws NullPointerException If passed b is null - * @throws IllegalArgumentException if passed a table name - * that is made of other than 'word' characters or underscores: i.e. - * [a-zA-Z_0-9]. - */ - public static byte [] isLegalTableName(final byte [] b) { - if (b == null || b.length <= 0) { - throw new IllegalArgumentException("Name is null or empty"); - } - if (b[0] == '.' || b[0] == '-') { - throw new IllegalArgumentException("Illegal first character <" + b[0] + - "> at 0. User-space table names can only start with 'word " + - "characters': i.e. [a-zA-Z_0-9]: " + Bytes.toString(b)); - } - for (int i = 0; i < b.length; i++) { - if (Character.isLetterOrDigit(b[i]) || b[i] == '_' || b[i] == '-' || - b[i] == '.') { - continue; - } - throw new IllegalArgumentException("Illegal character <" + b[i] + - "> at " + i + ". User-space table names can only contain " + - "'word characters': i.e. [a-zA-Z_0-9-.]: " + Bytes.toString(b)); - } - return b; - } - - /** - * @param key The key. - * @return The value. - */ - public byte[] getValue(byte[] key) { - return getValue(new ImmutableBytesWritable(key)); - } - - private byte[] getValue(final ImmutableBytesWritable key) { - ImmutableBytesWritable ibw = values.get(key); - if (ibw == null) - return null; - return ibw.get(); - } - - /** - * @param key The key. - * @return The value as a string. - */ - public String getValue(String key) { - byte[] value = getValue(Bytes.toBytes(key)); - if (value == null) - return null; - return Bytes.toString(value); - } - - /** - * @return All values. - */ - public Map getValues() { - // shallow pointer copy - return Collections.unmodifiableMap(values); - } - - /** - * @param key The key. - * @param value The value. - */ - public void setValue(byte[] key, byte[] value) { - setValue(new ImmutableBytesWritable(key), value); - } - - /* - * @param key The key. - * @param value The value. - */ - private void setValue(final ImmutableBytesWritable key, - final byte[] value) { - values.put(key, new ImmutableBytesWritable(value)); - } - - /* - * @param key The key. - * @param value The value. - */ - private void setValue(final ImmutableBytesWritable key, - final ImmutableBytesWritable value) { - values.put(key, value); - } - - /** - * @param key The key. - * @param value The value. - */ - public void setValue(String key, String value) { - if (value == null) { - remove(Bytes.toBytes(key)); - } else { - setValue(Bytes.toBytes(key), Bytes.toBytes(value)); - } - } - - /** - * @param key Key whose key and value we're to remove from HTD parameters. - */ - public void remove(final byte [] key) { - values.remove(new ImmutableBytesWritable(key)); - } - - /** - * @return true if all columns in the table should be read only - */ - public boolean isReadOnly() { - return isSomething(READONLY_KEY, DEFAULT_READONLY); - } - - /** - * @param readOnly True if all of the columns in the table should be read - * only. - */ - public void setReadOnly(final boolean readOnly) { - setValue(READONLY_KEY, readOnly? TRUE: FALSE); - } - - /** - * @return true if that table's log is hflush by other means - */ - public synchronized boolean isDeferredLogFlush() { - if(this.isDeferredLog == null) { - this.isDeferredLog = - isSomething(DEFERRED_LOG_FLUSH_KEY, DEFAULT_DEFERRED_LOG_FLUSH); - } - return this.isDeferredLog; - } - - /** - * @param isDeferredLogFlush true if that table's log is hlfush by oter means - * only. - */ - public void setDeferredLogFlush(final boolean isDeferredLogFlush) { - setValue(DEFERRED_LOG_FLUSH_KEY, isDeferredLogFlush? TRUE: FALSE); - } - - /** @return name of table */ - public byte [] getName() { - return name; - } - - /** @return name of table */ - public String getNameAsString() { - return this.nameAsString; - } - - /** @return max hregion size for table */ - public long getMaxFileSize() { - byte [] value = getValue(MAX_FILESIZE_KEY); - if (value != null) - return Long.valueOf(Bytes.toString(value)).longValue(); - return HConstants.DEFAULT_MAX_FILE_SIZE; - } - - /** @param name name of table */ - public void setName(byte[] name) { - this.name = name; - } - - /** - * @param maxFileSize The maximum file size that a store file can grow to - * before a split is triggered. - */ - public void setMaxFileSize(long maxFileSize) { - setValue(MAX_FILESIZE_KEY, Bytes.toBytes(Long.toString(maxFileSize))); - } - - /** - * @return memory cache flush size for each hregion - */ - public long getMemStoreFlushSize() { - byte [] value = getValue(MEMSTORE_FLUSHSIZE_KEY); - if (value != null) - return Long.valueOf(Bytes.toString(value)).longValue(); - return DEFAULT_MEMSTORE_FLUSH_SIZE; - } - - /** - * @param memstoreFlushSize memory cache flush size for each hregion - */ - public void setMemStoreFlushSize(long memstoreFlushSize) { - setValue(MEMSTORE_FLUSHSIZE_KEY, - Bytes.toBytes(Long.toString(memstoreFlushSize))); - } - - /** - * Adds a column family. - * @param family HColumnDescriptor of familyto add. - */ - public void addFamily(final HColumnDescriptor family) { - if (family.getName() == null || family.getName().length <= 0) { - throw new NullPointerException("Family name cannot be null or empty"); - } - this.families.put(family.getName(), family); - } - - /** - * Checks to see if this table contains the given column family - * @param c Family name or column name. - * @return true if the table contains the specified family name - */ - public boolean hasFamily(final byte [] c) { - return families.containsKey(c); - } - - /** - * @return Name of this table and then a map of all of the column family - * descriptors. - * @see #getNameAsString() - */ - @Override - public String toString() { - StringBuilder s = new StringBuilder(); - s.append('\'').append(Bytes.toString(name)).append('\''); - s.append(getValues(true)); - for (HColumnDescriptor f : families.values()) { - s.append(", ").append(f); - } - return s.toString(); - } - - public String toStringCustomizedValues() { - StringBuilder s = new StringBuilder(); - s.append('\'').append(Bytes.toString(name)).append('\''); - s.append(getValues(false)); - for(HColumnDescriptor hcd : families.values()) { - s.append(", ").append(hcd.toStringCustomizedValues()); - } - return s.toString(); - } - - private StringBuilder getValues(boolean printDefaults) { - StringBuilder s = new StringBuilder(); - - // step 1: set partitioning and pruning - Set reservedKeys = new TreeSet(); - Set configKeys = new TreeSet(); - for (ImmutableBytesWritable k : values.keySet()) { - if (!RESERVED_KEYWORDS.contains(k)) { - configKeys.add(k); - continue; - } - // only print out IS_ROOT/IS_META if true - String key = Bytes.toString(k.get()); - String value = Bytes.toString(values.get(k).get()); - if (key.equalsIgnoreCase(IS_ROOT) || key.equalsIgnoreCase(IS_META)) { - if (Boolean.valueOf(value) == false) continue; - } - if (printDefaults - || !DEFAULT_VALUES.containsKey(key) - || !DEFAULT_VALUES.get(key).equalsIgnoreCase(value)) { - reservedKeys.add(k); - } - } - - // early exit optimization - if (reservedKeys.isEmpty() && configKeys.isEmpty()) return s; - - // step 2: printing - s.append(", {METHOD => 'table_att'"); - - // print all reserved keys first - for (ImmutableBytesWritable k : reservedKeys) { - String key = Bytes.toString(k.get()); - String value = Bytes.toString(values.get(k).get()); - s.append(", "); - s.append(key); - s.append(" => "); - s.append('\'').append(value).append('\''); - } - - if (!configKeys.isEmpty()) { - // print all non-reserved, advanced config keys as a separate subset - s.append(", "); - s.append(HConstants.CONFIG).append(" => "); - s.append("{"); - boolean printComma = false; - for (ImmutableBytesWritable k : configKeys) { - String key = Bytes.toString(k.get()); - String value = Bytes.toString(values.get(k).get()); - if (printComma) s.append(", "); - printComma = true; - s.append('\'').append(key).append('\''); - s.append(" => "); - s.append('\'').append(value).append('\''); - } - s.append('}'); - } - - s.append('}'); // end METHOD - - return s; - } - - public static Map getDefaultValues() { - return Collections.unmodifiableMap(DEFAULT_VALUES); - } - - /** - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof HTableDescriptor)) { - return false; - } - return compareTo((HTableDescriptor)obj) == 0; - } - - /** - * @see java.lang.Object#hashCode() - */ - @Override - public int hashCode() { - int result = Bytes.hashCode(this.name); - result ^= Byte.valueOf(TABLE_DESCRIPTOR_VERSION).hashCode(); - if (this.families != null && this.families.size() > 0) { - for (HColumnDescriptor e: this.families.values()) { - result ^= e.hashCode(); - } - } - result ^= values.hashCode(); - return result; - } - - // Writable - - public void readFields(DataInput in) throws IOException { - int version = in.readInt(); - if (version < 3) - throw new IOException("versions < 3 are not supported (and never existed!?)"); - // version 3+ - name = Bytes.readByteArray(in); - nameAsString = Bytes.toString(this.name); - setRootRegion(in.readBoolean()); - setMetaRegion(in.readBoolean()); - values.clear(); - int numVals = in.readInt(); - for (int i = 0; i < numVals; i++) { - ImmutableBytesWritable key = new ImmutableBytesWritable(); - ImmutableBytesWritable value = new ImmutableBytesWritable(); - key.readFields(in); - value.readFields(in); - values.put(key, value); - } - families.clear(); - int numFamilies = in.readInt(); - for (int i = 0; i < numFamilies; i++) { - HColumnDescriptor c = new HColumnDescriptor(); - c.readFields(in); - families.put(c.getName(), c); - } - if (version < 4) { - return; - } - } - - public void write(DataOutput out) throws IOException { - out.writeInt(TABLE_DESCRIPTOR_VERSION); - Bytes.writeByteArray(out, name); - out.writeBoolean(isRootRegion()); - out.writeBoolean(isMetaRegion()); - out.writeInt(values.size()); - for (Map.Entry e: - values.entrySet()) { - e.getKey().write(out); - e.getValue().write(out); - } - out.writeInt(families.size()); - for(Iterator it = families.values().iterator(); - it.hasNext(); ) { - HColumnDescriptor family = it.next(); - family.write(out); - } - } - - // Comparable - - public int compareTo(final HTableDescriptor other) { - int result = Bytes.compareTo(this.name, other.name); - if (result == 0) { - result = families.size() - other.families.size(); - } - if (result == 0 && families.size() != other.families.size()) { - result = Integer.valueOf(families.size()).compareTo( - Integer.valueOf(other.families.size())); - } - if (result == 0) { - for (Iterator it = families.values().iterator(), - it2 = other.families.values().iterator(); it.hasNext(); ) { - result = it.next().compareTo(it2.next()); - if (result != 0) { - break; - } - } - } - if (result == 0) { - // punt on comparison for ordering, just calculate difference - result = this.values.hashCode() - other.values.hashCode(); - if (result < 0) - result = -1; - else if (result > 0) - result = 1; - } - return result; - } - - /** - * @return Immutable sorted map of families. - */ - public Collection getFamilies() { - return Collections.unmodifiableCollection(this.families.values()); - } - - /** - * @return Immutable sorted set of the keys of the families. - */ - public Set getFamiliesKeys() { - return Collections.unmodifiableSet(this.families.keySet()); - } - - public HColumnDescriptor[] getColumnFamilies() { - return getFamilies().toArray(new HColumnDescriptor[0]); - } - - /** - * @param column - * @return Column descriptor for the passed family name or the family on - * passed in column. - */ - public HColumnDescriptor getFamily(final byte [] column) { - return this.families.get(column); - } - - /** - * @param column - * @return Column descriptor for the passed family name or the family on - * passed in column. - */ - public HColumnDescriptor removeFamily(final byte [] column) { - return this.families.remove(column); - } - - /** - * @param rootdir qualified path of HBase root directory - * @param tableName name of table - * @return path for table - */ - public static Path getTableDir(Path rootdir, final byte [] tableName) { - return new Path(rootdir, Bytes.toString(tableName)); - } - - /** Table descriptor for -ROOT- catalog table */ - public static final HTableDescriptor ROOT_TABLEDESC = new HTableDescriptor( - HConstants.ROOT_TABLE_NAME, - new HColumnDescriptor[] { - new HColumnDescriptor(HConstants.CATALOG_FAMILY) - // Ten is arbitrary number. Keep versions to help debugging. - .setMaxVersions(10) - .setInMemory(true) - .setBlocksize(8 * 1024) - .setTimeToLive(HConstants.FOREVER) - .setScope(HConstants.REPLICATION_SCOPE_LOCAL) - }); - - /** Table descriptor for .META. catalog table */ - public static final HTableDescriptor META_TABLEDESC = new HTableDescriptor( - HConstants.META_TABLE_NAME, new HColumnDescriptor[] { - new HColumnDescriptor(HConstants.CATALOG_FAMILY) - // Ten is arbitrary number. Keep versions to help debugging. - .setMaxVersions(10) - .setInMemory(true) - .setBlocksize(8 * 1024) - .setScope(HConstants.REPLICATION_SCOPE_LOCAL), - new HColumnDescriptor(HConstants.CATALOG_HISTORIAN_FAMILY) - .setMaxVersions(HConstants.ALL_VERSIONS) - .setBlocksize(8 * 1024) - .setTimeToLive(HConstants.WEEK_IN_SECONDS) - .setScope(HConstants.REPLICATION_SCOPE_LOCAL) - }); - - /** - * @return true if all columns in the table should be read only - */ - public boolean isWALDisabled() { - return isSomething(DISABLE_WAL_KEY, false); - } - - /** - * @param readOnly True if all of the columns in the table should be read - * only. - */ - public void setWALDisabled(final boolean disable) { - setValue(DISABLE_WAL_KEY, disable? TRUE: FALSE); - } - -} diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/KeyValue.java deleted file mode 100644 index 5a55683..0000000 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ /dev/null @@ -1,2299 +0,0 @@ -/** - * Copyright 2009 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Map; -import java.util.Random; - -import javax.annotation.concurrent.Immutable; - -import com.facebook.swift.codec.ThriftConstructor; -import com.facebook.swift.codec.ThriftField; -import com.facebook.swift.codec.ThriftStruct; -import com.google.common.primitives.Longs; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.io.HeapSize; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.ClassSize; -import org.apache.hadoop.io.RawComparator; -import org.apache.hadoop.io.Writable; - -/** - * An HBase Key/Value. - * - *

If being used client-side, the primary methods to access individual fields - * are {@link #getRow()}, {@link #getFamily()}, {@link #getQualifier()}, - * {@link #getTimestamp()}, and {@link #getValue()}. These methods allocate new - * byte arrays and return copies so they should be avoided server-side. - * - *

Instances of this class are immutable. They are not - * comparable but Comparators are provided. Comparators change with context, - * whether user table or a catalog table comparison context. Its - * important that you use the appropriate comparator comparing rows in - * particular. There are Comparators for KeyValue instances and then for - * just the Key portion of a KeyValue used mostly in {@link HFile}. - * - *

KeyValue wraps a byte array and has offset and length for passed array - * at where to start interpreting the content as a KeyValue blob. The KeyValue - * blob format inside the byte array is: - * <keylength> <valuelength> <key> <value> - * Key is decomposed as: - * <rowlength> <row> <columnfamilylength> <columnfamily> <columnqualifier> <timestamp> <keytype> - * Rowlength maximum is Short.MAX_SIZE, column family length maximum is - * Byte.MAX_SIZE, and column qualifier + key length must be < Integer.MAX_SIZE. - * The column does not contain the family/qualifier delimiter. - * - *

TODO: Group Key-only comparators and operations into a Key class, just - * for neatness sake, if can figure what to call it. - */ - -@Immutable -@ThriftStruct -public final class KeyValue implements Writable, HeapSize, Cloneable { - static final Log LOG = LogFactory.getLog(KeyValue.class); - - /** - * Colon character in UTF-8 - */ - public static final char COLUMN_FAMILY_DELIMITER = ':'; - public static final int DELIMITER = ','; - - public static final byte[] COLUMN_FAMILY_DELIM_ARRAY = - new byte[]{COLUMN_FAMILY_DELIMITER}; - - /** - * Comparator for plain key/values; i.e. non-catalog table key/values. - */ - public static final KVComparator COMPARATOR = new KVComparator(); - - /** - * Comparator for plain key; i.e. non-catalog table key. Works on Key portion - * of KeyValue only. - */ - public static final KeyComparator KEY_COMPARATOR = new KeyComparator(); - - /** - * A {@link KVComparator} for .META. catalog table - * {@link KeyValue}s. - */ - public static final KVComparator META_COMPARATOR = new MetaComparator(); - - /** - * A {@link KVComparator} for .META. catalog table - * {@link KeyValue} keys. - */ - public static final KeyComparator META_KEY_COMPARATOR = new MetaKeyComparator(); - - /** - * A {@link KVComparator} for -ROOT- catalog table - * {@link KeyValue}s. - */ - public static final KVComparator ROOT_COMPARATOR = new RootComparator(); - - /** - * A {@link KVComparator} for -ROOT- catalog table - * {@link KeyValue} keys. - */ - public static final KeyComparator ROOT_KEY_COMPARATOR = new RootKeyComparator(); - - /** - * Get the appropriate row comparator for the specified table. - * - * Hopefully we can get rid of this, I added this here because it's replacing - * something in HSK. We should move completely off of that. - * - * @param tableName The table name. - * @return The comparator. - */ - public static KeyComparator getRowComparator(byte [] tableName) { - if(Bytes.equals(HTableDescriptor.ROOT_TABLEDESC.getName(),tableName)) { - return ROOT_COMPARATOR.getRawComparator(); - } - if(Bytes.equals(HTableDescriptor.META_TABLEDESC.getName(), tableName)) { - return META_COMPARATOR.getRawComparator(); - } - return COMPARATOR.getRawComparator(); - } - - /** Size of the key length field in bytes*/ - public static final int KEY_LENGTH_SIZE = Bytes.SIZEOF_INT; - - /** Size of the key type field in bytes */ - public static final int TYPE_SIZE = Bytes.SIZEOF_BYTE; - - /** Size of the row length field in bytes */ - public static final int ROW_LENGTH_SIZE = Bytes.SIZEOF_SHORT; - - /** Size of the family length field in bytes */ - public static final int FAMILY_LENGTH_SIZE = Bytes.SIZEOF_BYTE; - - /** Size of the timestamp field in bytes */ - public static final int TIMESTAMP_SIZE = Bytes.SIZEOF_LONG; - - // Size of the timestamp and type byte on end of a key -- a long + a byte. - public static final int TIMESTAMP_TYPE_SIZE = TIMESTAMP_SIZE + TYPE_SIZE; - - // Size of the length shorts and bytes in key. - public static final int KEY_INFRASTRUCTURE_SIZE = ROW_LENGTH_SIZE - + FAMILY_LENGTH_SIZE + TIMESTAMP_TYPE_SIZE; - - // How far into the key the row starts at. First thing to read is the short - // that says how long the row is. - public static final int ROW_OFFSET = - Bytes.SIZEOF_INT /*keylength*/ + - Bytes.SIZEOF_INT /*valuelength*/; - - // Size of the length ints in a KeyValue datastructure. - public static final int KEYVALUE_INFRASTRUCTURE_SIZE = ROW_OFFSET; - - /** - * Key type. - * Has space for other key types to be added later. Cannot rely on - * enum ordinals . They change if item is removed or moved. Do our own codes. - */ - public static enum Type { - /** - * The minimum type. The latest type in the sorted order out of all - * key-values for the same row/column/timestamp combination. See - * {@link #createLastOnRow} functions. The minimum key type is actually - * greater than all other types, as compared by - * {@link KeyComparator#compare(byte[], int, int, byte[], int, int)}. - */ - Minimum((byte)0), - Put((byte)4), - - Delete((byte)8), - DeleteColumn((byte)12), - DeleteFamily((byte)14), - - /** - * Maximum is used when searching; you look from maximum on down. The - * earliest type in the sorted order for the same row/column/timestamp. See - * {@link #createFirstOnRow} functions. The maximum key type is actually - * smaller than all other types, as compared by - * {@link KeyComparator#compare(byte[], int, int, byte[], int, int)}. - */ - Maximum((byte)255); - - private final byte code; - - Type(final byte c) { - this.code = c; - } - - public byte getCode() { - return this.code; - } - - /** - * Cannot rely on enum ordinals . They change if item is removed or moved. - * Do our own codes. - * @param b - * @return Type associated with passed code. - */ - public static Type codeToType(final byte b) { - for (Type t : Type.values()) { - if (t.getCode() == b) { - return t; - } - } - throw new RuntimeException("Unknown code " + b); - } - } - - /** - * Lowest possible key. - * Makes a Key with highest possible Timestamp, empty row and column. No - * key can be equal or lower than this one in memstore or in store file. - */ - public static final KeyValue LOWESTKEY = - new KeyValue(HConstants.EMPTY_BYTE_ARRAY, HConstants.LATEST_TIMESTAMP); - - private byte [] bytes = null; - private int offset = 0; - private int length = 0; - - /** Here be dragons **/ - - // used to achieve atomic operations in the memstore. - public long getMemstoreTS() { - return memstoreTS; - } - - public void setMemstoreTS(long memstoreTS) { - this.memstoreTS = memstoreTS; - } - - // default value is 0, aka DNC - private long memstoreTS = 0; - - /** Dragon time over, return to normal business */ - - - /** Writable Constructor -- DO NOT USE */ - public KeyValue() {} - - @ThriftConstructor - public KeyValue( - @ThriftField(1) final ByteBuffer buffer) { - this.bytes = buffer.array(); - this.length = buffer.limit() - buffer.position(); - this.offset = buffer.position() + buffer.arrayOffset(); - } - - @ThriftField(1) - public ByteBuffer getByteBuffer() { - return ByteBuffer.wrap(bytes, offset, length); - } - - /** - * Creates a KeyValue from the start of the specified byte array. - * Presumes bytes content is formatted as a KeyValue blob. - * @param bytes byte array - */ - public KeyValue(final byte [] bytes) { - this(bytes, 0); - } - - /** - * Creates a KeyValue from the specified byte array and offset. - * Presumes bytes content starting at offset is - * formatted as a KeyValue blob. - * @param bytes byte array - * @param offset offset to start of KeyValue - */ - public KeyValue(final byte [] bytes, final int offset) { - this(bytes, offset, getLength(bytes, offset)); - } - - /** - * Creates a KeyValue from the specified byte array, starting at offset, and - * for length length. - * @param bytes byte array - * @param offset offset to start of the KeyValue - * @param length length of the KeyValue - */ - public KeyValue(final byte [] bytes, final int offset, final int length) { - this.bytes = bytes; - this.offset = offset; - this.length = length; - } - - /** Constructors that build a new backing byte array from fields */ - - /** - * Constructs KeyValue structure filled with null value. - * Sets type to {@link KeyValue.Type#Maximum} - * @param row - row key (arbitrary byte array) - * @param timestamp - */ - public KeyValue(final byte [] row, final long timestamp) { - this(row, timestamp, Type.Maximum); - } - - /** - * Constructs KeyValue structure filled with null value. - * @param row - row key (arbitrary byte array) - * @param timestamp - */ - public KeyValue(final byte [] row, final long timestamp, Type type) { - this(row, null, null, timestamp, type, null); - } - - /** - * Constructs KeyValue structure filled with null value. - * Sets type to {@link KeyValue.Type#Maximum} - * @param row - row key (arbitrary byte array) - * @param family family name - * @param qualifier column qualifier - */ - public KeyValue(final byte [] row, final byte [] family, - final byte [] qualifier) { - this(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum); - } - - /** - * Constructs KeyValue structure filled with null value. - * @param row - row key (arbitrary byte array) - * @param family family name - * @param qualifier column qualifier - */ - public KeyValue(final byte [] row, final byte [] family, - final byte [] qualifier, final byte [] value) { - this(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Put, value); - } - - /** - * Constructs KeyValue structure filled with specified values. - * @param row row key - * @param family family name - * @param qualifier column qualifier - * @param timestamp version timestamp - * @param type key type - * @throws IllegalArgumentException - */ - public KeyValue(final byte[] row, final byte[] family, - final byte[] qualifier, final long timestamp, Type type) { - this(row, family, qualifier, timestamp, type, null); - } - - /** - * Constructs KeyValue structure filled with specified values. - * @param row row key - * @param family family name - * @param qualifier column qualifier - * @param timestamp version timestamp - * @param value column value - * @throws IllegalArgumentException - */ - public KeyValue(final byte[] row, final byte[] family, - final byte[] qualifier, final long timestamp, final byte[] value) { - this(row, family, qualifier, timestamp, Type.Put, value); - } - - /** - * Constructs KeyValue structure filled with specified values. - * @param row row key - * @param family family name - * @param qualifier column qualifier - * @param timestamp version timestamp - * @param type key type - * @param value column value - * @throws IllegalArgumentException - */ - public KeyValue(final byte[] row, final byte[] family, - final byte[] qualifier, final long timestamp, Type type, - final byte[] value) { - this(row, family, qualifier, 0, qualifier==null ? 0 : qualifier.length, - timestamp, type, value, 0, value==null ? 0 : value.length); - } - - /** - * Constructs KeyValue structure filled with specified values. - * @param row row key - * @param family family name - * @param qualifier column qualifier - * @param qoffset qualifier offset - * @param qlength qualifier length - * @param timestamp version timestamp - * @param type key type - * @param value column value - * @param voffset value offset - * @param vlength value length - * @throws IllegalArgumentException - */ - public KeyValue(byte [] row, byte [] family, - byte [] qualifier, int qoffset, int qlength, long timestamp, Type type, - byte [] value, int voffset, int vlength) { - this(row, 0, row==null ? 0 : row.length, - family, 0, family==null ? 0 : family.length, - qualifier, qoffset, qlength, timestamp, type, - value, voffset, vlength); - } - - /** - * Constructs KeyValue structure filled with specified values. - *

- * Column is split into two fields, family and qualifier. - * @param row row key - * @param roffset row offset - * @param rlength row length - * @param family family name - * @param foffset family offset - * @param flength family length - * @param qualifier column qualifier - * @param qoffset qualifier offset - * @param qlength qualifier length - * @param timestamp version timestamp - * @param type key type - * @param value column value - * @param voffset value offset - * @param vlength value length - * @throws IllegalArgumentException - */ - public KeyValue(final byte [] row, final int roffset, final int rlength, - final byte [] family, final int foffset, final int flength, - final byte [] qualifier, final int qoffset, final int qlength, - final long timestamp, final Type type, - final byte [] value, final int voffset, final int vlength) { - this.bytes = createByteArray(row, roffset, rlength, - family, foffset, flength, qualifier, qoffset, qlength, - timestamp, type, value, voffset, vlength); - this.length = bytes.length; - this.offset = 0; - } - - /** - * Write KeyValue format into a byte array. - * - * @param row row key - * @param roffset row offset - * @param rlength row length - * @param family family name - * @param foffset family offset - * @param flength family length - * @param qualifier column qualifier - * @param qoffset qualifier offset - * @param qlength qualifier length - * @param timestamp version timestamp - * @param type key type - * @param value column value - * @param voffset value offset - * @param vlength value length - * @return The newly created byte array. - */ - static byte [] createByteArray(final byte [] row, final int roffset, - final int rlength, final byte [] family, final int foffset, int flength, - final byte [] qualifier, final int qoffset, int qlength, - final long timestamp, final Type type, - final byte [] value, final int voffset, int vlength) { - if (rlength > Short.MAX_VALUE) { - throw new IllegalArgumentException("Row > " + Short.MAX_VALUE); - } - if (row == null) { - throw new IllegalArgumentException("Row is null"); - } - // Family length - flength = family == null ? 0 : flength; - if (flength > Byte.MAX_VALUE) { - throw new IllegalArgumentException("Family > " + Byte.MAX_VALUE); - } - // Qualifier length - qlength = qualifier == null ? 0 : qlength; - if (qlength > Integer.MAX_VALUE - rlength - flength) { - throw new IllegalArgumentException("Qualifier > " + Integer.MAX_VALUE); - } - // Key length - long longkeylength = KEY_INFRASTRUCTURE_SIZE + rlength + flength + qlength; - if (longkeylength > Integer.MAX_VALUE) { - throw new IllegalArgumentException("keylength " + longkeylength + " > " + - Integer.MAX_VALUE); - } - int keylength = (int)longkeylength; - // Value length - vlength = value == null? 0 : vlength; - if (vlength > HConstants.MAXIMUM_VALUE_LENGTH) { // FindBugs INT_VACUOUS_COMPARISON - throw new IllegalArgumentException("Valuer > " + - HConstants.MAXIMUM_VALUE_LENGTH); - } - - // Allocate right-sized byte array. - byte [] bytes = new byte[KEYVALUE_INFRASTRUCTURE_SIZE + keylength + vlength]; - // Write key, value and key row length. - int pos = 0; - pos = Bytes.putInt(bytes, pos, keylength); - pos = Bytes.putInt(bytes, pos, vlength); - pos = Bytes.putShort(bytes, pos, (short)(rlength & 0x0000ffff)); - pos = Bytes.putBytes(bytes, pos, row, roffset, rlength); - pos = Bytes.putByte(bytes, pos, (byte)(flength & 0x0000ff)); - if(flength != 0) { - pos = Bytes.putBytes(bytes, pos, family, foffset, flength); - } - if(qlength != 0) { - pos = Bytes.putBytes(bytes, pos, qualifier, qoffset, qlength); - } - pos = Bytes.putLong(bytes, pos, timestamp); - pos = Bytes.putByte(bytes, pos, type.getCode()); - if (value != null && value.length > 0) { - pos = Bytes.putBytes(bytes, pos, value, voffset, vlength); - } - return bytes; - } - - /** - * Write KeyValue format into a byte array. - *

- * Takes column in the form family:qualifier - * @param row - row key (arbitrary byte array) - * @param roffset - * @param rlength - * @param column - * @param coffset - * @param clength - * @param timestamp - * @param type - * @param value - * @param voffset - * @param vlength - * @return The newly created byte array. - */ - static byte [] createByteArray(final byte [] row, final int roffset, - final int rlength, - final byte [] column, final int coffset, int clength, - final long timestamp, final Type type, - final byte [] value, final int voffset, int vlength) { - // If column is non-null, figure where the delimiter is at. - int delimiteroffset = 0; - if (column != null && column.length > 0) { - delimiteroffset = getFamilyDelimiterIndex(column, coffset, clength); - if (delimiteroffset > Byte.MAX_VALUE) { - throw new IllegalArgumentException("Family > " + Byte.MAX_VALUE); - } - } else { - return createByteArray(row,roffset,rlength,null,0,0,null,0,0,timestamp, - type,value,voffset,vlength); - } - int flength = delimiteroffset-coffset; - int qlength = clength - flength - 1; - return createByteArray(row, roffset, rlength, column, coffset, - flength, column, delimiteroffset+1, qlength, timestamp, type, - value, voffset, vlength); - } - - /** - * Needed doing 'contains' on List. Only compares the key portion, not the - * value. - */ - public boolean equals(Object other) { - if (!(other instanceof KeyValue)) { - return false; - } - KeyValue kv = (KeyValue)other; - // Comparing bytes should be fine doing equals test. Shouldn't have to - // worry about special .META. comparators doing straight equals. - boolean result = Bytes.BYTES_RAWCOMPARATOR.compare(getBuffer(), - getKeyOffset(), getKeyLength(), - kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength()) == 0; - return result; - } - - public int hashCode() { - byte[] b = getBuffer(); - int start = getOffset(), end = getOffset() + getLength(); - int h = b[start++]; - for (int i = start; i < end; i++) { - h = (h * 13) ^ b[i]; - } - return h; - } - - //--------------------------------------------------------------------------- - // - // KeyValue cloning - // - //--------------------------------------------------------------------------- - - /** - * Clones a KeyValue. This creates a copy, re-allocating the buffer. - * @return Fully copied clone of this KeyValue - */ - public KeyValue clone() { - byte [] b = new byte[this.length]; - System.arraycopy(this.bytes, this.offset, b, 0, this.length); - KeyValue ret = new KeyValue(b, 0, b.length); - // Important to clone the memstoreTS as well - otherwise memstore's - // update-in-place methods (eg increment) will end up creating - // new entries - ret.setMemstoreTS(memstoreTS); - return ret; - } - - /** - * Creates a shallow copy of this KeyValue, reusing the data byte buffer. - * http://en.wikipedia.org/wiki/Object_copy - * @return Shallow copy of this KeyValue - */ - public KeyValue shallowCopy() { - KeyValue shallowCopy = new KeyValue(this.bytes, this.offset, this.length); - shallowCopy.setMemstoreTS(this.memstoreTS); - return shallowCopy; - } - - //--------------------------------------------------------------------------- - // - // String representation - // - //--------------------------------------------------------------------------- - - public String toString() { - if (this.bytes == null || this.bytes.length == 0) { - return "empty"; - } - return keyToString(this.bytes, this.offset + ROW_OFFSET, getKeyLength()) + - "/vlen=" + getValueLength(); - } - - /** - * @param k Key portion of a KeyValue. - * @return Key as a String. - */ - public static String keyToString(final byte [] k) { - return keyToString(k, 0, k.length); - } - - /** - * Produces a string map for this key/value pair. Useful for programmatic use - * and manipulation of the data stored in an HLogKey, for example, printing - * as JSON. Values are left out due to their tendency to be large. If needed, - * they can be added manually. - * - * @return the Map containing data from this key - */ - public Map toStringMap() { - Map stringMap = new HashMap(); - stringMap.put("row", Bytes.toStringBinary(getRow())); - stringMap.put("family", Bytes.toStringBinary(getFamily())); - stringMap.put("qualifier", Bytes.toStringBinary(getQualifier())); - stringMap.put("timestamp", getTimestamp()); - stringMap.put("vlen", getValueLength()); - return stringMap; - } - - public static String keyToString(final byte [] b, final int o, final int l) { - if (b == null) return ""; - int rowlength = Bytes.toShort(b, o); - String row = Bytes.toStringBinary(b, o + Bytes.SIZEOF_SHORT, rowlength); - int columnoffset = o + Bytes.SIZEOF_SHORT + 1 + rowlength; - int familylength = b[columnoffset - 1]; - int columnlength = l - ((columnoffset - o) + TIMESTAMP_TYPE_SIZE); - String family = familylength == 0? "": - Bytes.toStringBinary(b, columnoffset, familylength); - String qualifier = columnlength == 0? "": - Bytes.toStringBinary(b, columnoffset + familylength, - columnlength - familylength); - long timestamp = Bytes.toLong(b, o + (l - TIMESTAMP_TYPE_SIZE)); - String timestampStr = humanReadableTimestamp(timestamp); - byte type = b[o + l - 1]; - return row + "/" + family + - (family != null && family.length() > 0? ":" :"") + - qualifier + "/" + timestampStr + "/" + Type.codeToType(type); - } - - public static String humanReadableTimestamp(final long timestamp) { - if (timestamp == HConstants.LATEST_TIMESTAMP) { - return "LATEST_TIMESTAMP"; - } - if (timestamp == HConstants.OLDEST_TIMESTAMP) { - return "OLDEST_TIMESTAMP"; - } - return String.valueOf(timestamp); - } - - //--------------------------------------------------------------------------- - // - // Public Member Accessors - // - //--------------------------------------------------------------------------- - - /** - * @return The byte array backing this KeyValue. - */ - public byte [] getBuffer() { - return this.bytes; - } - - /** - * @return Offset into {@link #getBuffer()} at which this KeyValue starts. - */ - public int getOffset() { - return this.offset; - } - - /** - * @return Length of bytes this KeyValue occupies in {@link #getBuffer()}. - */ - public int getLength() { - return length; - } - - //--------------------------------------------------------------------------- - // - // Length and Offset Calculators - // - //--------------------------------------------------------------------------- - - /** - * Determines the total length of the KeyValue stored in the specified - * byte array and offset. Includes all headers. - * @param bytes byte array - * @param offset offset to start of the KeyValue - * @return length of entire KeyValue, in bytes - */ - private static int getLength(byte [] bytes, int offset) { - return ROW_OFFSET + - Bytes.toInt(bytes, offset) + - Bytes.toInt(bytes, offset + Bytes.SIZEOF_INT); - } - - /** - * @return Key offset in backing buffer.. - */ - public int getKeyOffset() { - return this.offset + ROW_OFFSET; - } - - public String getKeyString() { - return Bytes.toStringBinary(getBuffer(), getKeyOffset(), getKeyLength()); - } - - /** - * @return Length of key portion. - */ - public int getKeyLength() { - return Bytes.toInt(this.bytes, this.offset); - } - - /** - * @return Value offset - */ - public int getValueOffset() { - return getKeyOffset() + getKeyLength(); - } - - /** - * @return Value length - */ - public int getValueLength() { - return Bytes.toInt(this.bytes, this.offset + Bytes.SIZEOF_INT); - } - - /** - * @return Row offset - */ - public int getRowOffset() { - return getKeyOffset() + Bytes.SIZEOF_SHORT; - } - - /** - * @return Row length - */ - public short getRowLength() { - return Bytes.toShort(this.bytes, getKeyOffset()); - } - - /** - * @return Family offset - */ - public int getFamilyOffset() { - return getFamilyOffset(getRowLength()); - } - - /** - * @return Family offset - */ - public int getFamilyOffset(int rlength) { - return this.offset + ROW_OFFSET + Bytes.SIZEOF_SHORT + rlength + Bytes.SIZEOF_BYTE; - } - - /** - * @return Family length - */ - public byte getFamilyLength() { - return getFamilyLength(getFamilyOffset()); - } - - /** - * @return Family length - */ - public byte getFamilyLength(int foffset) { - return this.bytes[foffset-1]; - } - - /** - * @return Qualifier offset - */ - public int getQualifierOffset() { - return getQualifierOffset(getFamilyOffset()); - } - - /** - * @return Qualifier offset - */ - public int getQualifierOffset(int foffset) { - return foffset + getFamilyLength(foffset); - } - - /** - * @return Qualifier length - */ - public int getQualifierLength() { - return getQualifierLength(getRowLength(),getFamilyLength()); - } - - /** - * @return Qualifier length - */ - public int getQualifierLength(int rlength, int flength) { - return getKeyLength() - - (KEY_INFRASTRUCTURE_SIZE + rlength + flength); - } - - /** - * @return Column (family + qualifier) length - */ - public int getTotalColumnLength() { - int rlength = getRowLength(); - int foffset = getFamilyOffset(rlength); - return getTotalColumnLength(rlength,foffset); - } - - /** - * @return Column (family + qualifier) length - */ - public int getTotalColumnLength(int rlength, int foffset) { - int flength = getFamilyLength(foffset); - int qlength = getQualifierLength(rlength,flength); - return flength + qlength; - } - - /** - * @return Timestamp offset - */ - public int getTimestampOffset() { - return getTimestampOffset(getKeyLength()); - } - - /** - * @param keylength Pass if you have it to save on a int creation. - * @return Timestamp offset - */ - public int getTimestampOffset(final int keylength) { - return getKeyOffset() + keylength - TIMESTAMP_TYPE_SIZE; - } - - /** - * @return True if this KeyValue has a LATEST_TIMESTAMP timestamp. - */ - public boolean isLatestTimestamp() { - return Bytes.compareTo(getBuffer(), getTimestampOffset(), Bytes.SIZEOF_LONG, - HConstants.LATEST_TIMESTAMP_BYTES, 0, Bytes.SIZEOF_LONG) == 0; - } - - /** - * @param now Time to set into this IFF timestamp == - * {@link HConstants#LATEST_TIMESTAMP} (else, its a noop). - * @return True is we modified this. - */ - public boolean updateLatestStamp(final byte [] now) { - if (this.isLatestTimestamp()) { - int tsOffset = getTimestampOffset(); - System.arraycopy(now, 0, this.bytes, tsOffset, Bytes.SIZEOF_LONG); - return true; - } - return false; - } - - //--------------------------------------------------------------------------- - // - // Methods that return copies of fields - // - //--------------------------------------------------------------------------- - - /** - * Do not use unless you have to. Used internally for compacting and testing. - * - * Use {@link #getRow()}, {@link #getFamily()}, {@link #getQualifier()}, and - * {@link #getValue()} if accessing a KeyValue client-side. - * @return Copy of the key portion only. - */ - public byte [] getKey() { - int keylength = getKeyLength(); - byte [] key = new byte[keylength]; - System.arraycopy(getBuffer(), getKeyOffset(), key, 0, keylength); - return key; - } - - /** - * Returns value in a new byte array. - * Primarily for use client-side. If server-side, use - * {@link #getBuffer()} with appropriate offsets and lengths instead to - * save on allocations. - * @return Value in a new byte array. - */ - public byte [] getValue() { - int o = getValueOffset(); - int l = getValueLength(); - byte [] result = new byte[l]; - System.arraycopy(getBuffer(), o, result, 0, l); - return result; - } - - /** - * Primarily for use client-side. Returns the row of this KeyValue in a new - * byte array.

- * - * If server-side, use {@link #getBuffer()} with appropriate offsets and - * lengths instead. - * @return Row in a new byte array. - */ - public byte [] getRow() { - int o = getRowOffset(); - short l = getRowLength(); - byte [] result = new byte[l]; - System.arraycopy(getBuffer(), o, result, 0, l); - return result; - } - - /** - * - * @return Timestamp - */ - public long getTimestamp() { - return getTimestamp(getKeyLength()); - } - - /** - * @param keylength Pass if you have it to save on a int creation. - * @return Timestamp - */ - long getTimestamp(final int keylength) { - int tsOffset = getTimestampOffset(keylength); - return Bytes.toLong(this.bytes, tsOffset); - } - - /** - * @return Type of this KeyValue. - */ - public byte getType() { - return getType(getKeyLength()); - } - - /** - * @param keylength Pass if you have it to save on a int creation. - * @return Type of this KeyValue. - */ - byte getType(final int keylength) { - return this.bytes[this.offset + keylength - 1 + ROW_OFFSET]; - } - - /** - * @return True if a delete type, a {@link KeyValue.Type#Delete} or - * a {KeyValue.Type#DeleteFamily} or a {@link KeyValue.Type#DeleteColumn} - * KeyValue type. - */ - public boolean isDelete() { - int t = getType(); - return Type.Delete.getCode() <= t && t <= Type.DeleteFamily.getCode(); - } - - /** - * @return return True if Put type. - */ - public boolean isPut() { - int t = getType(); - return (t == Type.Put.getCode()) ? true : false; - } - - /** - * @return True if this KV is a {@link KeyValue.Type#Delete} type. - */ - public boolean isDeleteType() { - return getType() == Type.Delete.getCode(); - } - - /** - * @return True if this KV is a delete family type. - */ - public boolean isDeleteFamily() { - return getType() == Type.DeleteFamily.getCode(); - } - - /** - * @return True if this KV is a delete column type. - */ - public boolean isDeleteColumn() { - return getType() == Type.DeleteColumn.getCode(); - } - - /** - * - * @return True if this KV is a delete family or column type. - */ - public boolean isDeleteColumnOrFamily() { - int t = getType(); - return t == Type.DeleteColumn.getCode() || t == Type.DeleteFamily.getCode(); - } - - /** - * Primarily for use client-side. Returns the family of this KeyValue in a - * new byte array.

- * - * If server-side, use {@link #getBuffer()} with appropriate offsets and - * lengths instead. - * @return Returns family. Makes a copy. - */ - public byte [] getFamily() { - int o = getFamilyOffset(); - int l = getFamilyLength(o); - byte [] result = new byte[l]; - System.arraycopy(this.bytes, o, result, 0, l); - return result; - } - - /** - * Primarily for use client-side. Returns the column qualifier of this - * KeyValue in a new byte array.

- * - * If server-side, use {@link #getBuffer()} with appropriate offsets and - * lengths instead. - * Use {@link #getBuffer()} with appropriate offsets and lengths instead. - * @return Returns qualifier. Makes a copy. - */ - public byte [] getQualifier() { - int o = getQualifierOffset(); - int l = getQualifierLength(); - byte [] result = new byte[l]; - System.arraycopy(this.bytes, o, result, 0, l); - return result; - } - - //--------------------------------------------------------------------------- - // - // KeyValue splitter - // - //--------------------------------------------------------------------------- - - /** - * Utility class that splits a KeyValue buffer into separate byte arrays. - *

- * Should get rid of this if we can, but is very useful for debugging. - */ - public static class SplitKeyValue { - private byte [][] split; - SplitKeyValue() { - this.split = new byte[6][]; - } - public void setRow(byte [] value) { this.split[0] = value; } - public void setFamily(byte [] value) { this.split[1] = value; } - public void setQualifier(byte [] value) { this.split[2] = value; } - public void setTimestamp(byte [] value) { this.split[3] = value; } - public void setType(byte [] value) { this.split[4] = value; } - public void setValue(byte [] value) { this.split[5] = value; } - public byte [] getRow() { return this.split[0]; } - public byte [] getFamily() { return this.split[1]; } - public byte [] getQualifier() { return this.split[2]; } - public byte [] getTimestamp() { return this.split[3]; } - public byte [] getType() { return this.split[4]; } - public byte [] getValue() { return this.split[5]; } - } - - public void verify() { - int splitOffset = this.offset; - int keyLen = Bytes.toInt(bytes, splitOffset); - splitOffset += Bytes.SIZEOF_INT; - int valLen = Bytes.toInt(bytes, splitOffset); - splitOffset += Bytes.SIZEOF_INT; - short rowLen = Bytes.toShort(bytes, splitOffset); - splitOffset += Bytes.SIZEOF_SHORT; - splitOffset += rowLen; - byte famLen = bytes[splitOffset]; - if (!(keyLen >= 0 && valLen >=0 - && keyLen + valLen + KEYVALUE_INFRASTRUCTURE_SIZE == this.length - && this.length + this.offset <= this.bytes.length - && rowLen >=0 && rowLen <= keyLen - && famLen >=0 && famLen <= keyLen - && rowLen + famLen <= keyLen - )) { - String msg = "Malformed key value: " - + ", offset =" + offset - + ", keyLen =" + keyLen - + ", valLen =" + valLen - + ", length =" + length - + ", rowLen =" + rowLen - + ", famLen =" + famLen - + ", bytes[] is " + Bytes.toStringBinary(bytes, offset, length); - LOG.error(msg); - throw new IllegalArgumentException(msg); - }; - } - - public SplitKeyValue split() { - SplitKeyValue split = new SplitKeyValue(); - int splitOffset = this.offset; - int keyLen = Bytes.toInt(bytes, splitOffset); - splitOffset += Bytes.SIZEOF_INT; - int valLen = Bytes.toInt(bytes, splitOffset); - splitOffset += Bytes.SIZEOF_INT; - short rowLen = Bytes.toShort(bytes, splitOffset); - splitOffset += Bytes.SIZEOF_SHORT; - byte [] row = new byte[rowLen]; - System.arraycopy(bytes, splitOffset, row, 0, rowLen); - splitOffset += rowLen; - split.setRow(row); - byte famLen = bytes[splitOffset]; - splitOffset += Bytes.SIZEOF_BYTE; - byte [] family = new byte[famLen]; - System.arraycopy(bytes, splitOffset, family, 0, famLen); - splitOffset += famLen; - split.setFamily(family); - int colLen = keyLen - - (rowLen + famLen + Bytes.SIZEOF_SHORT + Bytes.SIZEOF_BYTE + - Bytes.SIZEOF_LONG + Bytes.SIZEOF_BYTE); - byte [] qualifier = new byte[colLen]; - System.arraycopy(bytes, splitOffset, qualifier, 0, colLen); - splitOffset += colLen; - split.setQualifier(qualifier); - byte [] timestamp = new byte[Bytes.SIZEOF_LONG]; - System.arraycopy(bytes, splitOffset, timestamp, 0, Bytes.SIZEOF_LONG); - splitOffset += Bytes.SIZEOF_LONG; - split.setTimestamp(timestamp); - byte [] type = new byte[1]; - type[0] = bytes[splitOffset]; - splitOffset += Bytes.SIZEOF_BYTE; - split.setType(type); - byte [] value = new byte[valLen]; - System.arraycopy(bytes, splitOffset, value, 0, valLen); - split.setValue(value); - return split; - } - - //--------------------------------------------------------------------------- - // - // Compare specified fields against those contained in this KeyValue - // - //--------------------------------------------------------------------------- - - /** - * @param family - * @return True if matching families. - */ - public boolean matchingFamily(final byte [] family) { - if (this.length == 0 || this.bytes.length == 0) { - return false; - } - int o = getFamilyOffset(); - int l = getFamilyLength(o); - return Bytes.compareTo(family, 0, family.length, this.bytes, o, l) == 0; - } - - /** - * @param qualifier - * @return True if matching qualifiers. - */ - public boolean matchingQualifier(final byte [] qualifier) { - int o = getQualifierOffset(); - int l = getQualifierLength(); - return Bytes.compareTo(qualifier, 0, qualifier.length, - this.bytes, o, l) == 0; - } - - public boolean matchingRow(final byte [] row) { - return matchingRow(row, 0, row.length); - } - - public boolean matchingRow(final byte[] row, int offset, int length) { - return Bytes.compareTo(row, offset, length, - this.bytes, getRowOffset(), getRowLength()) == 0; - } - - /** - * @param column Column minus its delimiter - * @return True if column matches. - */ - public boolean matchingColumnNoDelimiter(final byte [] column) { - int rl = getRowLength(); - int o = getFamilyOffset(rl); - int fl = getFamilyLength(o); - int l = fl + getQualifierLength(rl,fl); - return Bytes.compareTo(column, 0, column.length, this.bytes, o, l) == 0; - } - - /** - * - * @param family column family - * @param qualifier column qualifier - * @return True if column matches - */ - public boolean matchingColumn(final byte[] family, final byte[] qualifier) { - int rl = getRowLength(); - int o = getFamilyOffset(rl); - int fl = getFamilyLength(o); - int ql = getQualifierLength(rl,fl); - if(Bytes.compareTo(family, 0, family.length, this.bytes, o, family.length) - != 0) { - return false; - } - if(qualifier == null || qualifier.length == 0) { - if(ql == 0) { - return true; - } - return false; - } - return Bytes.compareTo(qualifier, 0, qualifier.length, - this.bytes, o + fl, ql) == 0; - } - - /** - * @param left - * @param loffset - * @param llength - * @param lfamilylength Offset of family delimiter in left column. - * @param right - * @param roffset - * @param rlength - * @param rfamilylength Offset of family delimiter in right column. - * @return The result of the comparison. - */ - static int compareColumns(final byte [] left, final int loffset, - final int llength, final int lfamilylength, - final byte [] right, final int roffset, final int rlength, - final int rfamilylength) { - // Compare family portion first. - int diff = Bytes.compareTo(left, loffset, lfamilylength, - right, roffset, rfamilylength); - if (diff != 0) { - return diff; - } - // Compare qualifier portion - return Bytes.compareTo(left, loffset + lfamilylength, - llength - lfamilylength, - right, roffset + rfamilylength, rlength - rfamilylength); - } - - /** - * @return True if non-null row and column. - */ - public boolean nonNullRowAndColumn() { - return getRowLength() > 0 && !isEmptyColumn(); - } - - /** - * @return True if column is empty. - */ - public boolean isEmptyColumn() { - return getQualifierLength() == 0; - } - - /** - * Converts this KeyValue to only contain the key portion (the value is - * changed to be null). This method does a full copy of the backing byte - * array and does not modify the original byte array of this KeyValue. - *

- * This method is used by {@link KeyOnlyFilter} and is an advanced feature of - * KeyValue, proceed with caution. - * @param lenAsVal replace value with the actual value length (false=empty) - */ - public void convertToKeyOnly(boolean lenAsVal) { - // KV format: - // Rebuild as: <0:4> - int dataLen = lenAsVal? Bytes.SIZEOF_INT : 0; - byte [] newBuffer = new byte[getKeyLength() + ROW_OFFSET + dataLen]; - System.arraycopy(this.bytes, this.offset, newBuffer, 0, - Math.min(newBuffer.length,this.length)); - Bytes.putInt(newBuffer, Bytes.SIZEOF_INT, dataLen); - if (lenAsVal) { - Bytes.putInt(newBuffer, newBuffer.length - dataLen, this.getValueLength()); - } - this.bytes = newBuffer; - this.offset = 0; - this.length = newBuffer.length; - } - - /** - * Splits a column in family:qualifier form into separate byte arrays. - *

- * Not recommend to be used as this is old-style API. - * @param c The column. - * @return The parsed column. - */ - public static byte [][] parseColumn(byte [] c) { - final int index = getDelimiter(c, 0, c.length, COLUMN_FAMILY_DELIMITER); - if (index == -1) { - // If no delimiter, return array of size 1 - return new byte [][] { c }; - } else if(index == c.length - 1) { - // Only a family, return array size 1 - byte [] family = new byte[c.length-1]; - System.arraycopy(c, 0, family, 0, family.length); - return new byte [][] { family }; - } - // Family and column, return array size 2 - final byte [][] result = new byte [2][]; - result[0] = new byte [index]; - System.arraycopy(c, 0, result[0], 0, index); - final int len = c.length - (index + 1); - result[1] = new byte[len]; - System.arraycopy(c, index + 1 /*Skip delimiter*/, result[1], 0, - len); - return result; - } - - /** - * Makes a column in family:qualifier form from separate byte arrays. - *

- * Not recommended for usage as this is old-style API. - * @param family - * @param qualifier - * @return family:qualifier - */ - public static byte [] makeColumn(byte [] family, byte [] qualifier) { - return Bytes.add(family, COLUMN_FAMILY_DELIM_ARRAY, qualifier); - } - - public byte[] makeColumn() { - return Bytes.add(bytes, getFamilyOffset(), getFamilyLength(), - COLUMN_FAMILY_DELIM_ARRAY, 0, COLUMN_FAMILY_DELIM_ARRAY.length, - bytes, getQualifierOffset(), getQualifierLength()); - } - - /** - * @param b - * @return Index of the family-qualifier colon delimiter character in passed - * buffer. - */ - public static int getFamilyDelimiterIndex(final byte [] b, final int offset, - final int length) { - return getRequiredDelimiter(b, offset, length, COLUMN_FAMILY_DELIMITER); - } - - private static int getRequiredDelimiter(final byte [] b, - final int offset, final int length, final int delimiter) { - int index = getDelimiter(b, offset, length, delimiter); - if (index < 0) { - throw new IllegalArgumentException("No '" + (char)delimiter + "' in <" + - Bytes.toString(b) + ">" + ", length=" + length + ", offset=" + offset); - } - return index; - } - - /** - * This function is only used in Meta key comparisons so its error message - * is specific for meta key errors. - */ - static int getRequiredDelimiterInReverse(final byte [] b, - final int offset, final int length, final int delimiter) { - int index = getDelimiterInReverse(b, offset, length, delimiter); - if (index < 0) { - throw new IllegalArgumentException("No '" + ((char) delimiter) + "' in <" + - Bytes.toString(b) + ">" + ", length=" + length + ", offset=" + offset); - } - return index; - } - - /** - * @param b - * @param delimiter - * @return Index of delimiter having started from start of b - * moving rightward. - */ - public static int getDelimiter(final byte [] b, int offset, final int length, - final int delimiter) { - if (b == null) { - throw new NullPointerException(); - } - int result = -1; - for (int i = offset; i < length + offset; i++) { - if (b[i] == delimiter) { - result = i; - break; - } - } - return result; - } - - /** - * Find index of passed delimiter walking from end of buffer backwards. - * @param b - * @param delimiter - * @return Index of delimiter - */ - public static int getDelimiterInReverse(final byte [] b, final int offset, - final int length, final int delimiter) { - if (b == null) { - throw new NullPointerException(); - } - int result = -1; - for (int i = (offset + length) - 1; i >= offset; i--) { - if (b[i] == delimiter) { - result = i; - break; - } - } - return result; - } - - /** - * A {@link KVComparator} for -ROOT- catalog table - * {@link KeyValue}s. - */ - public static class RootComparator extends MetaComparator { - private final KeyComparator rawcomparator = new RootKeyComparator(); - - public KeyComparator getRawComparator() { - return this.rawcomparator; - } - - @Override - protected Object clone() throws CloneNotSupportedException { - return new RootComparator(); - } - } - - /** - * A {@link KVComparator} for .META. catalog table - * {@link KeyValue}s. - */ - public static class MetaComparator extends KVComparator { - private final KeyComparator rawcomparator = new MetaKeyComparator(); - - public KeyComparator getRawComparator() { - return this.rawcomparator; - } - - @Override - protected Object clone() throws CloneNotSupportedException { - return new MetaComparator(); - } - } - - /** - * Compare KeyValues. When we compare KeyValues, we only compare the Key - * portion. This means two KeyValues with same Key but different Values are - * considered the same as far as this Comparator is concerned. - * Hosts a {@link KeyComparator}. - */ - public static class KVComparator implements java.util.Comparator { - private final KeyComparator rawcomparator = new KeyComparator(); - - /** - * @return RawComparator that can compare the Key portion of a KeyValue. - * Used in hfile where indices are the Key portion of a KeyValue. - */ - public KeyComparator getRawComparator() { - return this.rawcomparator; - } - - public int compare(final KeyValue left, final KeyValue right) { - int ret = getRawComparator().compare(left.getBuffer(), - left.getOffset() + ROW_OFFSET, left.getKeyLength(), - right.getBuffer(), right.getOffset() + ROW_OFFSET, - right.getKeyLength()); - if (ret != 0) return ret; - // Negate this comparison so later edits show up first - return -Longs.compare(left.getMemstoreTS(), right.getMemstoreTS()); - } - - public int compareTimestamps(final KeyValue left, final KeyValue right) { - return compareTimestamps(left, left.getKeyLength(), right, - right.getKeyLength()); - } - - int compareTimestamps(final KeyValue left, final int lkeylength, - final KeyValue right, final int rkeylength) { - // Compare timestamps - long ltimestamp = left.getTimestamp(lkeylength); - long rtimestamp = right.getTimestamp(rkeylength); - return getRawComparator().compareTimestamps(ltimestamp, rtimestamp); - } - - /** - * @param left - * @param right - * @return Result comparing rows. - */ - public int compareRows(final KeyValue left, final KeyValue right) { - return compareRows(left, left.getRowLength(), right, - right.getRowLength()); - } - - /** - * @param left - * @param lrowlength Length of left row. - * @param right - * @param rrowlength Length of right row. - * @return Result comparing rows. - */ - public int compareRows(final KeyValue left, final short lrowlength, - final KeyValue right, final short rrowlength) { - return getRawComparator().compareRows(left.getBuffer(), - left.getRowOffset(), lrowlength, - right.getBuffer(), right.getRowOffset(), rrowlength); - } - - /** - * @param left - * @param row - row key (arbitrary byte array) - * @return RawComparator - */ - public int compareRows(final KeyValue left, final byte [] row) { - return getRawComparator().compareRows(left.getBuffer(), - left.getRowOffset(), left.getRowLength(), row, 0, row.length); - } - - public int compareRows(byte [] left, int loffset, int llength, - byte [] right, int roffset, int rlength) { - return getRawComparator().compareRows(left, loffset, llength, - right, roffset, rlength); - } - - public int compareColumns(final KeyValue left, final byte [] right, - final int roffset, final int rlength, final int rfamilyoffset) { - int offset = left.getFamilyOffset(); - int length = left.getFamilyLength() + left.getQualifierLength(); - return getRawComparator().compareColumns(left.getBuffer(), offset, length, - left.getFamilyLength(offset), - right, roffset, rlength, rfamilyoffset); - } - - int compareColumns(final KeyValue left, final short lrowlength, - final KeyValue right, final short rrowlength) { - int lfoffset = left.getFamilyOffset(lrowlength); - int rfoffset = right.getFamilyOffset(rrowlength); - int lclength = left.getTotalColumnLength(lrowlength,lfoffset); - int rclength = right.getTotalColumnLength(rrowlength, rfoffset); - int lfamilylength = left.getFamilyLength(lfoffset); - int rfamilylength = right.getFamilyLength(rfoffset); - return getRawComparator().compareColumns(left.getBuffer(), lfoffset, - lclength, lfamilylength, - right.getBuffer(), rfoffset, rclength, rfamilylength); - } - - /** - * Compares the row and column of two keyvalues for equality - * @param left - * @param right - * @return True if same row and column. - */ - public boolean matchingRowColumn(final KeyValue left, - final KeyValue right) { - short lrowlength = left.getRowLength(); - short rrowlength = right.getRowLength(); - // TsOffset = end of column data. just comparing Row+CF length of each - return ((left.getTimestampOffset() - left.getOffset()) == - (right.getTimestampOffset() - right.getOffset())) && - matchingRows(left, lrowlength, right, rrowlength) && - compareColumns(left, lrowlength, right, rrowlength) == 0; - } - - /** - * @param left - * @param right - * @return True if rows match. - */ - public boolean matchingRows(final KeyValue left, final byte [] right) { - return compareRows(left, right) == 0; - } - - /** - * Compares the row of two keyvalues for equality - * @param left - * @param right - * @return True if rows match. - */ - public boolean matchingRows(final KeyValue left, final KeyValue right) { - short lrowlength = left.getRowLength(); - short rrowlength = right.getRowLength(); - return matchingRows(left, lrowlength, right, rrowlength); - } - - /** - * @param left - * @param lrowlength - * @param right - * @param rrowlength - * @return True if rows match. - */ - public boolean matchingRows(final KeyValue left, final short lrowlength, - final KeyValue right, final short rrowlength) { - return lrowlength == rrowlength && - compareRows(left, lrowlength, right, rrowlength) == 0; - } - - public boolean matchingRows(final byte [] left, final int loffset, - final int llength, - final byte [] right, final int roffset, final int rlength) { - int compare = compareRows(left, loffset, llength, - right, roffset, rlength); - if (compare != 0) { - return false; - } - return true; - } - - /** - * Compares the row and timestamp of two keys - * Was called matchesWithoutColumn in HStoreKey. - * @param right Key to compare against. - * @return True if same row and timestamp is greater than the timestamp in - * right - */ - public boolean matchingRowsGreaterTimestamp(final KeyValue left, - final KeyValue right) { - short lrowlength = left.getRowLength(); - short rrowlength = right.getRowLength(); - if (!matchingRows(left, lrowlength, right, rrowlength)) { - return false; - } - return left.getTimestamp() >= right.getTimestamp(); - } - - @Override - protected Object clone() throws CloneNotSupportedException { - return new KVComparator(); - } - - /** - * @return Comparator that ignores timestamps; useful counting versions. - */ - public KVComparator getComparatorIgnoringTimestamps() { - KVComparator c = null; - try { - c = (KVComparator)this.clone(); - c.getRawComparator().ignoreTimestamp = true; - } catch (CloneNotSupportedException e) { - LOG.error("Not supported", e); - } - return c; - } - - /** - * @return Comparator that ignores key type; useful checking deletes - */ - public KVComparator getComparatorIgnoringType() { - KVComparator c = null; - try { - c = (KVComparator)this.clone(); - c.getRawComparator().ignoreType = true; - } catch (CloneNotSupportedException e) { - LOG.error("Not supported", e); - } - return c; - } - } - - /** - * Creates a KeyValue that is last on the specified row id. That is, - * every other possible KeyValue for the given row would compareTo() - * less than the result of this call. - * @param row row key - * @return Last possible KeyValue on passed row - */ - public static KeyValue createLastOnRow(final byte[] row) { - return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, Type.Minimum); - } - - /** - * Create a KeyValue that is smaller than all other possible KeyValues - * for the given row. That is any (valid) KeyValue on 'row' would sort - * _after_ the result. - * - * @param row - row key (arbitrary byte array) - * @return First possible KeyValue on passed row - */ - public static KeyValue createFirstOnRow(final byte [] row) { - return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP); - } - - /** - * Creates a KeyValue that is smaller than all other KeyValues that - * are older than the passed timestamp. - * @param row - row key (arbitrary byte array) - * @param ts - timestamp - * @return First possible key on passed row and timestamp. - */ - public static KeyValue createFirstOnRow(final byte [] row, - final long ts) { - return new KeyValue(row, null, null, ts, Type.Maximum); - } - - /** - * @param row - row key (arbitrary byte array) - * @param c column - {@link #parseColumn(byte[])} is called to split - * the column. - * @param ts - timestamp - * @return First possible key on passed row, column and timestamp - * @deprecated - */ - public static KeyValue createFirstOnRow(final byte [] row, final byte [] c, - final long ts) { - byte [][] split = parseColumn(c); - return new KeyValue(row, split[0], split[1], ts, Type.Maximum); - } - - /** - * Create a KeyValue for the specified row, family and qualifier that would be - * smaller than all other possible KeyValues that have the same row,family,qualifier. - * Used for seeking. - * @param row - row key (arbitrary byte array) - * @param family - family name - * @param qualifier - column qualifier - * @return First possible key on passed row, and column. - */ - public static KeyValue createFirstOnRow(final byte [] row, final byte [] family, - final byte [] qualifier) { - return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum); - } - - public static KeyValue createFirstDeleteFamilyOnRow(final byte[] row, - final byte[] family) { - return createDeleteFamilyOnRow(row, family, - HConstants.LATEST_TIMESTAMP); - } - - /** - * Create a Delete Family KeyValue for the specified row and family that would - * be smaller than all other possible Delete Family KeyValues that have the - * same row and family. Used for seeking. - * - * @param row - * - row key (arbitrary byte array) - * @param family - * - family name - * @param ts - * - timestamp - * @return the Delete Family possible key on passed row - * and ts. - */ - public static KeyValue createDeleteFamilyOnRow(final byte [] row, - final byte[] family, long ts) { - return new KeyValue(row, family, null, ts, - Type.DeleteFamily); - } - - /** - * @param row - row key (arbitrary byte array) - * @param f - family name - * @param q - column qualifier - * @param ts - timestamp - * @return First possible key on passed row, column and timestamp - */ - public static KeyValue createFirstOnRow(final byte [] row, final byte [] f, - final byte [] q, final long ts) { - return new KeyValue(row, f, q, ts, Type.Maximum); - } - - /** - * Create a KeyValue for the specified row, family and qualifier that would be - * smaller than all other possible KeyValues that have the same row, - * family, qualifier. - * Used for seeking. - * @param row row key - * @param roffset row offset - * @param rlength row length - * @param family family name - * @param foffset family offset - * @param flength family length - * @param qualifier column qualifier - * @param qoffset qualifier offset - * @param qlength qualifier length - * @return First possible key on passed Row, Family, Qualifier. - */ - public static KeyValue createFirstOnRow(final byte [] row, - final int roffset, final int rlength, final byte [] family, - final int foffset, final int flength, final byte [] qualifier, - final int qoffset, final int qlength) { - return new KeyValue(row, roffset, rlength, family, - foffset, flength, qualifier, qoffset, qlength, - HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0); - } - - /** - * Create a KeyValue for the specified row, family, qualifier, timestamp that - * would be smaller than or equal to all other possible KeyValues that have - * the same row, family, qualifier, timestamp. - * Used for seeking. - * @param row row key - * @param roffset row offset - * @param rlength row length - * @param family family name - * @param foffset family offset - * @param flength family length - * @param qualifier column qualifier - * @param qoffset qualifier offset - * @param qlength qualifier length - * @param timestamp timestamp - * @return First possible key on passed row, family, qualifier, timestamp - */ - public static KeyValue createFirstOnRow(final byte [] row, - final int roffset, final int rlength, final byte [] family, - final int foffset, final int flength, final byte [] qualifier, - final int qoffset, final int qlength, long timestamp) { - return new KeyValue(row, roffset, rlength, family, - foffset, flength, qualifier, qoffset, qlength, - timestamp, Type.Maximum, null, 0, 0); - } - - /** - * Create a KeyValue for the specified row, family and qualifier that would be - * larger than or equal to all other possible KeyValues that have the same - * row, family, qualifier. - * Used for reseeking. - * @param row row key - * @param roffset row offset - * @param rlength row length - * @param family family name - * @param foffset family offset - * @param flength family length - * @param qualifier column qualifier - * @param qoffset qualifier offset - * @param qlength qualifier length - * @return Last possible key on passed row, family, qualifier. - */ - public static KeyValue createLastOnRow(final byte [] row, - final int roffset, final int rlength, final byte [] family, - final int foffset, final int flength, final byte [] qualifier, - final int qoffset, final int qlength) { - return new KeyValue(row, roffset, rlength, family, - foffset, flength, qualifier, qoffset, qlength, - HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0); - } - - /** - * Similar to {@link #createLastOnRow(byte[], int, int, byte[], int, int, - * byte[], int, int)} but takes a {@link KeyValue}. - * - * @param kv the key-value pair to take row and column from - * @return the last key on the row/column of the given key-value pair - */ - public KeyValue createLastOnRowCol() { - return new KeyValue( - bytes, getRowOffset(), getRowLength(), - bytes, getFamilyOffset(), getFamilyLength(), - bytes, getQualifierOffset(), getQualifierLength(), - HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0); - } - - /** - * Creates the first KV with the row/family/qualifier of this KV and the - * given timestamp. Uses the "maximum" KV type that guarantees that the new - * KV is the lowest possible for this combination of row, family, qualifier, - * and timestamp. This KV's own timestamp is ignored. While this function - * copies the value from this KV, it is normally used on key-only KVs. - */ - public KeyValue createFirstOnRowColTS(long ts) { - return new KeyValue( - bytes, getRowOffset(), getRowLength(), - bytes, getFamilyOffset(), getFamilyLength(), - bytes, getQualifierOffset(), getQualifierLength(), - ts, Type.Maximum, bytes, getValueOffset(), getValueLength()); - } - - /** - * @param b - * @return A KeyValue made of a byte array that holds the key-only part. - * Needed to convert hfile index members to KeyValues. - */ - public static KeyValue createKeyValueFromKey(final byte [] b) { - return createKeyValueFromKey(b, 0, b.length); - } - - /** - * @param bb - * @return A KeyValue made of a byte buffer that holds the key-only part. - * Needed to convert hfile index members to KeyValues. - */ - public static KeyValue createKeyValueFromKey(final ByteBuffer bb) { - return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit()); - } - - /** - * @param b - * @param o - * @param l - * @return A KeyValue made of a byte array that holds the key-only part. - * Needed to convert hfile index members to KeyValues. - */ - public static KeyValue createKeyValueFromKey(final byte [] b, final int o, - final int l) { - byte [] newb = new byte[l + ROW_OFFSET]; - System.arraycopy(b, o, newb, ROW_OFFSET, l); - Bytes.putInt(newb, 0, l); - Bytes.putInt(newb, Bytes.SIZEOF_INT, 0); - return new KeyValue(newb); - } - - /** - * Compare key portion of a {@link KeyValue} for keys in -ROOT- - * table. - */ - public static class RootKeyComparator extends MetaKeyComparator { - public int compareRows(byte [] left, int loffset, int llength, - byte [] right, int roffset, int rlength) { - // Rows look like this: .META.,ROW_FROM_META,RID - // LOG.info("ROOT " + Bytes.toString(left, loffset, llength) + - // "---" + Bytes.toString(right, roffset, rlength)); - final int metalength = 7; // '.META.' length - int lmetaOffsetPlusDelimiter = loffset + metalength; - int leftFarDelimiter = getDelimiterInReverse(left, - lmetaOffsetPlusDelimiter, - llength - metalength, DELIMITER); - int rmetaOffsetPlusDelimiter = roffset + metalength; - int rightFarDelimiter = getDelimiterInReverse(right, - rmetaOffsetPlusDelimiter, rlength - metalength, - DELIMITER); - if (leftFarDelimiter < 0 && rightFarDelimiter >= 0) { - // Nothing between .META. and regionid. Its first key. - return -1; - } else if (rightFarDelimiter < 0 && leftFarDelimiter >= 0) { - return 1; - } else if (leftFarDelimiter < 0 && rightFarDelimiter < 0) { - return 0; - } - int result = super.compareRows(left, lmetaOffsetPlusDelimiter, - leftFarDelimiter - lmetaOffsetPlusDelimiter, - right, rmetaOffsetPlusDelimiter, - rightFarDelimiter - rmetaOffsetPlusDelimiter); - if (result != 0) { - return result; - } - // Compare last part of row, the rowid. - leftFarDelimiter++; - rightFarDelimiter++; - result = compareRowid(left, leftFarDelimiter, - llength - (leftFarDelimiter - loffset), - right, rightFarDelimiter, rlength - (rightFarDelimiter - roffset)); - return result; - } - } - - /** - * Comparator that compares row component only of a KeyValue. - */ - public static class RowComparator implements Comparator { - final KVComparator comparator; - - public RowComparator(final KVComparator c) { - this.comparator = c; - } - - public int compare(KeyValue left, KeyValue right) { - return comparator.compareRows(left, right); - } - } - - /** - * Compare key portion of a {@link KeyValue} for keys in .META. - * table. - */ - public static class MetaKeyComparator extends KeyComparator { - public int compareRows(byte [] left, int loffset, int llength, - byte [] right, int roffset, int rlength) { - // LOG.info("META " + Bytes.toString(left, loffset, llength) + - // "---" + Bytes.toString(right, roffset, rlength)); - int leftDelimiter = getDelimiter(left, loffset, llength, - DELIMITER); - int rightDelimiter = getDelimiter(right, roffset, rlength, - DELIMITER); - if (leftDelimiter < 0 && rightDelimiter >= 0) { - // Nothing between .META. and regionid. Its first key. - return -1; - } else if (rightDelimiter < 0 && leftDelimiter >= 0) { - return 1; - } else if (leftDelimiter < 0 && rightDelimiter < 0) { - return 0; - } - // Compare up to the delimiter - int result = Bytes.compareTo(left, loffset, leftDelimiter - loffset, - right, roffset, rightDelimiter - roffset); - if (result != 0) { - return result; - } - // Compare middle bit of the row. - // Move past delimiter - leftDelimiter++; - rightDelimiter++; - int leftFarDelimiter = getRequiredDelimiterInReverse(left, leftDelimiter, - llength - (leftDelimiter - loffset), DELIMITER); - int rightFarDelimiter = getRequiredDelimiterInReverse(right, - rightDelimiter, rlength - (rightDelimiter - roffset), - DELIMITER); - // Now compare middlesection of row. - result = super.compareRows(left, leftDelimiter, - leftFarDelimiter - leftDelimiter, right, rightDelimiter, - rightFarDelimiter - rightDelimiter); - if (result != 0) { - return result; - } - // Compare last part of row, the rowid. - leftFarDelimiter++; - rightFarDelimiter++; - result = compareRowid(left, leftFarDelimiter, - llength - (leftFarDelimiter - loffset), - right, rightFarDelimiter, rlength - (rightFarDelimiter - roffset)); - return result; - } - - protected int compareRowid(byte[] left, int loffset, int llength, - byte[] right, int roffset, int rlength) { - return Bytes.compareTo(left, loffset, llength, right, roffset, rlength); - } - } - - /** - * Avoids redundant comparisons for better performance. - */ - public static interface SamePrefixComparator { - /** - * Compare two keys assuming that the first n bytes are the same. - * @param commonPrefix How many bytes are the same. - */ - public int compareIgnoringPrefix(int commonPrefix, - T left, int loffset, int llength, - T right, int roffset, int rlength); - } - - /** - * Compare key portion of a {@link KeyValue}. - */ - public static class KeyComparator - implements RawComparator, SamePrefixComparator { - volatile boolean ignoreTimestamp = false; - volatile boolean ignoreType = false; - - public int compare(byte[] left, int loffset, int llength, byte[] right, - int roffset, int rlength) { - // Compare row - short lrowlength = Bytes.toShort(left, loffset); - short rrowlength = Bytes.toShort(right, roffset); - int compare = compareRows(left, loffset + Bytes.SIZEOF_SHORT, - lrowlength, right, roffset + Bytes.SIZEOF_SHORT, rrowlength); - if (compare != 0) { - return compare; - } - - // Compare the rest of the two KVs without making any assumptions about - // the common prefix. This function will not compare rows anyway, so we - // don't need to tell it that the common prefix includes the row. - return compareWithoutRow(0, left, loffset, llength, right, roffset, - rlength, rrowlength); - } - - /** - * Compare the two key-values, ignoring the prefix of the given length - * that is known to be the same between the two. - * @param commonPrefix the prefix length to ignore - */ - @Override - public int compareIgnoringPrefix(int commonPrefix, byte[] left, - int loffset, int llength, byte[] right, int roffset, int rlength) { - // Compare row - short lrowlength = Bytes.toShort(left, loffset); - short rrowlength; - - int comparisonResult = 0; - if (commonPrefix < ROW_LENGTH_SIZE) { - // almost nothing in common - rrowlength = Bytes.toShort(right, roffset); - comparisonResult = compareRows(left, loffset + ROW_LENGTH_SIZE, - lrowlength, right, roffset + ROW_LENGTH_SIZE, rrowlength); - } else { // the row length is the same - rrowlength = lrowlength; - if (commonPrefix < ROW_LENGTH_SIZE + rrowlength) { - // The rows are not the same. Exclude the common prefix and compare - // the rest of the two rows. - int common = commonPrefix - ROW_LENGTH_SIZE; - comparisonResult = compareRows( - left, loffset + common + ROW_LENGTH_SIZE, lrowlength - common, - right, roffset + common + ROW_LENGTH_SIZE, rrowlength - common); - } - } - if (comparisonResult != 0) { - return comparisonResult; - } - - assert lrowlength == rrowlength; - - return compareWithoutRow(commonPrefix, left, loffset, llength, right, - roffset, rlength, lrowlength); - } - - /** - * Compare column, timestamp, and key type (everything except the row). - * This method is used both in the normal comparator and the "same-prefix" - * comparator. Note that we are assuming that row portions of both KVs have - * already been parsed and found identical, and we don't validate that - * assumption here. - * @param commonPrefix the length of the common prefix of the two - * key-values being compared, including row length and row - */ - private int compareWithoutRow(int commonPrefix, byte[] left, int loffset, - int llength, byte[] right, int roffset, int rlength, short rowlength) { - // Compare column family. Start comparing past row and family length. - int lcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE + - rowlength + loffset; - int rcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE + - rowlength + roffset; - int lcolumnlength = llength - TIMESTAMP_TYPE_SIZE - - (lcolumnoffset - loffset); - int rcolumnlength = rlength - TIMESTAMP_TYPE_SIZE - - (rcolumnoffset - roffset); - - // If row matches, and no column in the 'left' AND put type is 'minimum', - // then return that left is larger than right. - - // This supports 'last key on a row' - the magic is if there is no column - // in the left operand, and the left operand has a type of '0' - magical - // value, then we say the left is bigger. This will let us seek to the - // last key in a row. - - byte ltype = left[loffset + (llength - 1)]; - byte rtype = right[roffset + (rlength - 1)]; - - // If the column is not specified, the "minimum" key type appears the - // latest in the sorted order, regardless of the timestamp. This is used - // for specifying the last key/value in a given row, because there is no - // "lexicographically last column" (it would be infinitely long). The - // "maximum" key type does not need this behavior. - if (lcolumnlength == 0 && ltype == Type.Minimum.getCode()) { - // left is "bigger", i.e. it appears later in the sorted order - return 1; - } - if (rcolumnlength == 0 && rtype == Type.Minimum.getCode()) { - return -1; - } - - int common = 0; - if (commonPrefix > 0) { - common = Math.max(0, commonPrefix - - rowlength - ROW_LENGTH_SIZE - FAMILY_LENGTH_SIZE); - common = Math.min(common, Math.min(lcolumnlength, rcolumnlength)); - } - - final int comparisonResult = Bytes.compareTo( - left, lcolumnoffset + common, lcolumnlength - common, - right, rcolumnoffset + common, rcolumnlength - common); - if (comparisonResult != 0) { - return comparisonResult; - } - - return compareTimestampAndType(left, loffset, llength, right, roffset, - rlength, ltype, rtype); - } - - private int compareTimestampAndType(byte[] left, int loffset, int llength, - byte[] right, int roffset, int rlength, byte ltype, byte rtype) { - int compare; - if (!this.ignoreTimestamp) { - // Get timestamps. - long ltimestamp = Bytes.toLong(left, - loffset + (llength - TIMESTAMP_TYPE_SIZE)); - long rtimestamp = Bytes.toLong(right, - roffset + (rlength - TIMESTAMP_TYPE_SIZE)); - compare = compareTimestamps(ltimestamp, rtimestamp); - if (compare != 0) { - return compare; - } - } - - if (!this.ignoreType) { - // Compare types. Let the delete types sort ahead of puts; i.e. types - // of higher numbers sort before those of lesser numbers. Maximum (255) - // appears ahead of everything, and minimum (0) appears after - // everything. - return (0xff & rtype) - (0xff & ltype); - } - return 0; - } - - public int compare(byte[] left, byte[] right) { - return compare(left, 0, left.length, right, 0, right.length); - } - - public int compareRows(byte [] left, int loffset, int llength, - byte [] right, int roffset, int rlength) { - return Bytes.compareTo(left, loffset, llength, right, roffset, rlength); - } - - protected int compareColumns( - byte [] left, int loffset, int llength, final int lfamilylength, - byte [] right, int roffset, int rlength, final int rfamilylength) { - return KeyValue.compareColumns(left, loffset, llength, lfamilylength, - right, roffset, rlength, rfamilylength); - } - - int compareTimestamps(final long ltimestamp, final long rtimestamp) { - // The below older timestamps sorting ahead of newer timestamps looks - // wrong but it is intentional. This way, newer timestamps are first - // found when we iterate over a memstore and newer versions are the - // first we trip over when reading from a store file. - if (ltimestamp < rtimestamp) { - return 1; - } else if (ltimestamp > rtimestamp) { - return -1; - } - return 0; - } - } - - // HeapSize - public long heapSize() { - return ClassSize.align(ClassSize.OBJECT + ClassSize.REFERENCE + - ClassSize.align(ClassSize.ARRAY + length) + - (2 * Bytes.SIZEOF_INT) + - Bytes.SIZEOF_LONG); - } - - // this overload assumes that the length bytes have already been read, - // and it expects the length of the KeyValue to be explicitly passed - // to it. - public void readFields(int length, final DataInput in) throws IOException { - this.length = length; - this.offset = 0; - this.bytes = new byte[this.length]; - in.readFully(this.bytes, 0, this.length); - this.verify(); - } - - // Writable - public void readFields(final DataInput in) throws IOException { - int length = in.readInt(); - readFields(length, in); - } - - public void write(final DataOutput out) throws IOException { - this.verify(); - out.writeInt(this.length); - out.write(this.bytes, this.offset, this.length); - } - - /** - * Returns the size of a key/value pair in bytes - * @param keyLength length of the key in bytes - * @param valueLength length of the value in bytes - * @return key/value pair size in bytes - */ - public static int getKVSize(final int keyLength, - final int valueLength) { - return ROW_OFFSET + keyLength + valueLength; - } - - /** - * Returns new keyValue which has all properties same as the passed ones - * except the value - * - * @param newValue - * - new value - * @return new KeyValue - */ - public KeyValue modifyValueAndClone(byte[] newValue) { - KeyValue newKV = new KeyValue(bytes, getRowOffset(), getRowLength(), bytes, - getFamilyOffset(), getFamilyLength(), bytes, getQualifierOffset(), - getQualifierLength(), this.getTimestamp(), Type.codeToType(this - .getType()), newValue, 0, newValue == null ? 0 : newValue.length); - newKV.setMemstoreTS(this.getMemstoreTS()); - return newKV; - } - - public static KeyValue generateKeyValue(String key, int dataSize) { - Random seededRandom = new Random(System.nanoTime()); - byte[] randomBytes = new byte[dataSize]; - seededRandom.nextBytes(randomBytes); - - return new KeyValue(Bytes.toBytes(key), Bytes.toBytes(""), - Bytes.toBytes(""), randomBytes); - } -} diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumClient.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumClient.java index 0fcca92..f58ee67 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumClient.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumClient.java @@ -25,12 +25,12 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.client.NoLeaderForRegionException; import org.apache.hadoop.hbase.consensus.log.LogFileInfo; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; import org.apache.hadoop.hbase.consensus.quorum.QuorumInfo; import org.apache.hadoop.hbase.consensus.rpc.LogState; import org.apache.hadoop.hbase.consensus.rpc.PeerStatus; import org.apache.hadoop.hbase.consensus.util.RaftUtil; import org.apache.hadoop.hbase.regionserver.DataStoreState; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Threads; import org.slf4j.Logger; @@ -49,8 +49,8 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; /** - * QuorumClient is the user facing agent for replicating commits (WALEdits) on - * a per-quorum level. Once set up, the replicateCommits(List) method + * QuorumClient is the user facing agent for replicating commits (Edits) on + * a per-quorum level. Once set up, the replicateCommits(List) method * will transparently handle contacting the leader, sending the edits to be * replicated, handling the retries / errors, etc. */ @@ -92,7 +92,7 @@ public class QuorumClient { this(RaftUtil.createDummyQuorumInfo(regionId), conf, pool); } - public synchronized long replicateCommits(List txns) + public synchronized long replicateCommits(List txns) throws IOException { int numRetries = 0; long endTime = System.currentTimeMillis() + maxOperationLatencyInMillis; diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumLoadTestClient.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumLoadTestClient.java index 748d3bd..2e172c4 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumLoadTestClient.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumLoadTestClient.java @@ -20,14 +20,6 @@ package org.apache.hadoop.hbase.consensus.client; */ -import java.io.IOException; -import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; @@ -35,12 +27,19 @@ import org.apache.commons.cli.Options; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; import org.apache.hadoop.hbase.util.DaemonThreadFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + public class QuorumLoadTestClient { private static final Logger LOG = LoggerFactory.getLogger( QuorumLoadTestClient.class); @@ -61,8 +60,9 @@ public class QuorumLoadTestClient { public static void loadTest() { // generate the test data int testDataIndex = 0; - List testData = new ArrayList<>(); + List testData = new ArrayList<>(); + Random random = new Random(System.currentTimeMillis()); while(true) { for (int batchSize = 1; batchSize <= maxBatchSize; batchSize++) { @@ -70,9 +70,7 @@ public class QuorumLoadTestClient { String payloadSizeName = FileUtils.byteCountToDisplaySize(payloadSize[testDataIndex]); for (int j = 0; j < batchSize; j++) { - KeyValue kv = KeyValue.generateKeyValue( - payloadSizeName,payloadSize[testDataIndex]); - testData.add(new WALEdit(Arrays.asList(kv))); + testData.add(QuorumEdit.generateRandomQuorumEdit(random, payloadSize[testDataIndex])); } long start = System.nanoTime(); diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumThriftClientAgent.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumThriftClientAgent.java index 075ed64..ac0961e 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumThriftClientAgent.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumThriftClientAgent.java @@ -31,10 +31,10 @@ import com.google.common.util.concurrent.SettableFuture; import io.airlift.units.Duration; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.consensus.log.LogFileInfo; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; import org.apache.hadoop.hbase.consensus.rpc.PeerStatus; import org.apache.hadoop.hbase.consensus.server.ConsensusService; import org.apache.hadoop.hbase.consensus.util.RaftUtil; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.thrift.protocol.TCompactProtocol; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -111,14 +111,14 @@ public class QuorumThriftClientAgent { } /** - * Replicates a list of WALEdits, on a given quorum. + * Replicates a list of Edits, on a given quorum. * @param regionId The region where we want to replicate these edits. * @param txns The actual edits * @return The commit index of the committed edits * @throws Exception */ public synchronized long replicateCommit(final String regionId, - final List txns) + final List txns) throws Exception { final ConsensusService localAgent = getConsensusServiceAgent(); diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumThriftClientCLI.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumThriftClientCLI.java index e3fa9f3..39eaea2 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumThriftClientCLI.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/client/QuorumThriftClientCLI.java @@ -26,19 +26,7 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; - -import com.facebook.swift.service.ThriftClient; -import com.facebook.swift.service.ThriftClientConfig; -import com.facebook.swift.service.ThriftClientManager; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.consensus.rpc.PeerStatus; -import org.apache.hadoop.hbase.consensus.server.ConsensusService; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; - -import java.io.IOException; -import java.util.List; public class QuorumThriftClientCLI { diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/Edit.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/Edit.java new file mode 100644 index 0000000..4b8596c --- /dev/null +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/Edit.java @@ -0,0 +1,19 @@ +package org.apache.hadoop.hbase.consensus.log; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + +public interface Edit { + // Returns the total size occupied by the serialized version of the Edit + int getSerializedLength(); + + // Serialize the edit to the given DataOutputStream + int write(final DataOutputStream os) throws IOException; + + // Deserialize the Edit from the given DataInputStream + Edit read(final DataInputStream is) throws IOException; + + // Comparator + int compareTo(final Edit e); +} diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/EditCodec.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/EditCodec.java new file mode 100644 index 0000000..ff3c125 --- /dev/null +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/EditCodec.java @@ -0,0 +1,17 @@ +package org.apache.hadoop.hbase.consensus.log; + +import java.io.IOException; +import java.util.List; +import java.nio.ByteBuffer; + +public interface EditCodec { + int getTotalSize(final List edits); + ByteBuffer serializeToByteBuffer(final List edits) throws IOException; + ByteBuffer serializeToByteBuffer(final List edits, long timestamp) + throws IOException; + List deserializeFromByteBuffer(final ByteBuffer data) + throws IOException; + + byte getType(); + byte getVersion(); +} diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/LogFileViewer.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/LogFileViewer.java index 258dcfa..794906d 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/LogFileViewer.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/LogFileViewer.java @@ -20,8 +20,12 @@ package org.apache.hadoop.hbase.consensus.log; */ -import org.apache.commons.cli.*; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; import org.apache.hadoop.hbase.util.MemoryBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,6 +38,9 @@ public class LogFileViewer { private static final Logger LOG = LoggerFactory.getLogger(LogFileViewer.class); + private static String DEFAULT_EDIT_CODEC_CLASSNAME = + "org.apache.hadoop.hbase.consensus.log.QuorumEditCodec"; + /** * @param args * @throws ParseException @@ -47,6 +54,9 @@ public class LogFileViewer { options.addOption("d", "detail", true, "Dump a detailed information about the transactions in the file"); + options.addOption("c", "codec", true, + "The codec class name to use. Default 'org.apache.hadoop.hbase.consensus.log'"); + if (args.length == 0) { printHelp(options); return; @@ -76,7 +86,19 @@ public class LogFileViewer { return ; } - dumpFileInfo(logFile, detailed); + String codecClassName = cmd.getOptionValue("codec", + LogFileViewer.DEFAULT_EDIT_CODEC_CLASSNAME); + + EditCodec codec = null; + try { + Class codecClass = Class.forName(codecClassName); + codec = (EditCodec)codecClass.newInstance(); + } catch (ClassNotFoundException|InstantiationException|IllegalAccessException e) { + e.printStackTrace(); + System.exit(-1); + } + + dumpFileInfo(logFile, detailed, codec); } public static void printHelp(final Options options) { @@ -84,7 +106,8 @@ public class LogFileViewer { formatter.printHelp("LogFileViewer", options, true); } - public static void dumpFileInfo(final File logFile, boolean detailed) + public static void dumpFileInfo(final File logFile, boolean detailed, + final EditCodec codec) throws IOException { long index; LogReader reader = new LogReader(logFile); @@ -105,7 +128,7 @@ public class LogFileViewer { LOG.info("(index:" + index + ", offset:" + reader.getCurrentIndexFileOffset() + ", "); try { - List edits = WALEdit.deserializeFromByteBuffer( + List edits = codec.deserializeFromByteBuffer( buffer.getBuffer()); LOG.info("Size: " + buffer.getBuffer().limit() + ", Number of edits : " + edits.size()); diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/QuorumEdit.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/QuorumEdit.java new file mode 100644 index 0000000..3015ef7 --- /dev/null +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/QuorumEdit.java @@ -0,0 +1,54 @@ +package org.apache.hadoop.hbase.consensus.log; + +import com.facebook.swift.codec.ThriftConstructor; +import com.facebook.swift.codec.ThriftField; +import com.facebook.swift.codec.ThriftStruct; +import org.apache.hadoop.hbase.util.Bytes; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Random; + +@ThriftStruct +public final class QuorumEdit implements Edit { + + byte[] data; + + @ThriftConstructor + public QuorumEdit(byte[] data) { + this.data = data; + } + + @ThriftField(1) + public byte[] getData() { + return data; + } + + @Override public int getSerializedLength() { + return Bytes.SIZEOF_INT + data.length; + } + + @Override public int write(DataOutputStream os) throws IOException { + os.writeInt(data.length); + os.write(data); + return data.length; + } + + @Override public Edit read(DataInputStream is) throws IOException { + int byteArrayLength = is.readInt(); + data = new byte[byteArrayLength]; + is.read(data); + return this; + } + + @Override public int compareTo(Edit e) { + return Bytes.compareTo(data, ((QuorumEdit)e).data); + } + + public static QuorumEdit generateRandomQuorumEdit(final Random random, int length) { + byte[] randomData = new byte[length]; + random.nextBytes(randomData); + return new QuorumEdit(randomData); + } +} diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/QuorumEditCodec.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/QuorumEditCodec.java new file mode 100644 index 0000000..d652ae0 --- /dev/null +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/log/QuorumEditCodec.java @@ -0,0 +1,187 @@ +package org.apache.hadoop.hbase.consensus.log; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.ipc.ByteBufferOutputStream; +import org.apache.hadoop.hbase.util.Bytes; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class QuorumEditCodec implements EditCodec { + + final static byte EDIT_TYPE = HConstants.QUORUM_EDIT_TYPE; + final static byte EDIT_VERSION = HConstants.QUORUM_EDIT_VERSION; + + public enum PayloadHeaderField { + MAGIC(0, Bytes.SIZEOF_BYTE), + TYPE(1, Bytes.SIZEOF_BYTE), + VERSION(2, Bytes.SIZEOF_BYTE), + TIMESTAMP(3, Bytes.SIZEOF_LONG), + NUM_EDITS(11, Bytes.SIZEOF_INT); + + final int offset; + final int length; + + private PayloadHeaderField(final int offset, final int length) { + this.offset = offset; + this.length = length; + } + } + + public final static int PAYLOAD_HEADER_SIZE = + PayloadHeaderField.NUM_EDITS.offset + + PayloadHeaderField.NUM_EDITS.length; + + + @Override public int getTotalSize(List edits) { + int size = 0; + for (Edit e : edits) { + size += e.getSerializedLength(); + } + return size; + } + + @Override public ByteBuffer serializeToByteBuffer(List edits) + throws IOException { + return serializeToByteBuffer(edits, System.currentTimeMillis()); + } + + @Override public ByteBuffer serializeToByteBuffer(List edits, + long timestamp) + throws IOException { + if (edits == null) { + return null; + } + + int totalPayloadSize = getTotalSize(edits); + ByteBufferOutputStream buffer = new ByteBufferOutputStream( + totalPayloadSize); + try (DataOutputStream os = new DataOutputStream(buffer)) { + // Write the magic value + os.write(HConstants.CONSENSUS_PAYLOAD_MAGIC_VALUE); + + // Write the type of Edit + os.write(EDIT_TYPE); + + // Write the version of Edit + os.write(EDIT_VERSION); + + // Write the timestamp + os.writeLong(timestamp); + + // Write the number of Edit in the list + os.writeInt(edits.size()); + + // Serialize the edits + for (Edit e : edits) { + e.write(os); + } + + // Flip and return the byte buffer. + return buffer.getByteBuffer(); + } + } + + @Override + public List deserializeFromByteBuffer(ByteBuffer data) + throws IOException { + + if (!isSameType(data)) { + return null; + } + + int firstBytePosition = data.position(); + int bufferLength = data.remaining(); + + // The check above already read the magic value and type fields, so move on + // to the version field. + byte version = data.get(firstBytePosition + + PayloadHeaderField.VERSION.offset); + if (version != QuorumEditCodec.EDIT_VERSION) { + return null; + } + + long timeStamp = data.getLong(firstBytePosition + PayloadHeaderField.TIMESTAMP.offset); + + int numEdits = data.getInt(firstBytePosition + + PayloadHeaderField.NUM_EDITS.offset); + + if (numEdits == 0) { + return Collections.emptyList(); + } + + // Wrap the remainder of the given ByteBuffer with a DataInputStream and + // de-serialize the list of Edits. + // + // If the Edits are compressed, wrap the InputStream by a decompression + // stream and allocate a new buffer to store the uncompressed data. + int cursor = firstBytePosition + PAYLOAD_HEADER_SIZE; + InputStream is = getByteArrayInputStream(data, cursor, + bufferLength - PAYLOAD_HEADER_SIZE); + + try (DataInputStream dis = new DataInputStream(is)) { + List edits = new ArrayList<>(); + for (int i = 0; i < numEdits; ++i) { + QuorumEdit edit = new QuorumEdit(null); + edits.add(edit.read(dis)); + } + return edits; + } + } + + private boolean isSameType(final ByteBuffer data) { + // Read the Magic Value + if (data.get(data.position() + PayloadHeaderField.MAGIC.offset) != + HConstants.CONSENSUS_PAYLOAD_MAGIC_VALUE) { + return false; + } + + // Read the payload type + if (data.get(data.position() + PayloadHeaderField.TYPE.offset) != + EDIT_TYPE) { + return false; + } + return true; + + } + + @Override public byte getType() { + return EDIT_TYPE; + } + + @Override public byte getVersion() { + return EDIT_VERSION; + } + + /** + * Wrap the array backing the given ByteBuffer with a ByteArrayInputStream. + * Since this InputStream works on the underlying array the state of the given + * ByteBuffer is guaranteed to remain unchanged. + * + * @param buffer an array backed {@link ByteBuffer} + * @param position the position in the buffer from where to start the stream + * @param length length of the input stream + * @return an {@link java.io.InputStream} wrapping the underlying array of + * the given {@link ByteBuffer} + */ + private static ByteArrayInputStream getByteArrayInputStream( + final ByteBuffer buffer, final int position, final int length) { + Preconditions.checkArgument(buffer.hasArray(), + "An array backed buffer is required"); + Preconditions.checkArgument(position >= buffer.position(), + "Position can not be behind buffer.position()"); + Preconditions.checkArgument( + position - buffer.position() + length <= buffer.remaining(), + "Length can not be past the remainder of the buffer"); + return new ByteArrayInputStream(buffer.array(), + buffer.arrayOffset() + position, length); + } +} diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/QuorumAgent.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/QuorumAgent.java index c0fe2cb..4f239b2 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/QuorumAgent.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/QuorumAgent.java @@ -8,12 +8,12 @@ import org.apache.hadoop.hbase.client.NoLeaderForRegionException; import org.apache.hadoop.hbase.conf.ConfigurationObserver; import org.apache.hadoop.hbase.consensus.exceptions.CommitQueueOverloadedException; import org.apache.hadoop.hbase.consensus.exceptions.NewLeaderException; +import org.apache.hadoop.hbase.consensus.log.Edit; +import org.apache.hadoop.hbase.consensus.log.EditCodec; import org.apache.hadoop.hbase.consensus.metrics.ConsensusMetrics; import org.apache.hadoop.hbase.consensus.protocol.ConsensusHost; import org.apache.hadoop.hbase.consensus.raft.events.ReplicateEntriesEvent; -import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.metrics.TimeStat; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.util.DaemonThreadFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,9 +37,9 @@ import java.util.concurrent.locks.ReentrantLock; * responsible for getting entries committed within a particular time window. * * There is a continuously running 'WAL Syncer' task, which will take a list - * of WALEdits to sync on the quorum. + * of Edits to sync on the quorum. */ -public class QuorumAgent implements ConfigurationObserver { +public class QuorumAgent implements ConfigurationObserver { private static final Logger LOG = LoggerFactory.getLogger(QuorumAgent.class); private volatile RaftQuorumContext context; @@ -65,14 +65,12 @@ public class QuorumAgent implements ConfigurationObserver { /** The interval between retries */ private volatile long sleepIntervalInMillis; - private final Compression.Algorithm compressionCodec; - // Lock to guarantee the ordering of log entries in WAL private final ReentrantLock appendLock = new ReentrantLock(true); private final Condition groupCommitBuffer = appendLock.newCondition(); - private LinkedList currentList = new LinkedList<>(); - private LinkedList syncList = new LinkedList<>(); + private LinkedList currentList = new LinkedList<>(); + private LinkedList syncList = new LinkedList<>(); private SettableFuture currentResult; private SettableFuture futureResult = SettableFuture.create(); @@ -84,8 +82,11 @@ public class QuorumAgent implements ConfigurationObserver { private Random random = new Random(); + private final EditCodec codec; + public QuorumAgent(RaftQuorumContext context) { this.context = context; + this.codec = context.getCodec(); Configuration conf = context.getConf(); commitQueueEntriesLimit = conf.getLong( @@ -104,10 +105,6 @@ public class QuorumAgent implements ConfigurationObserver { HConstants.QUORUM_CLIENT_SLEEP_INTERVAL_KEY, HConstants.QUORUM_CLIENT_SLEEP_INTERVAL_DEFAULT); - compressionCodec = Compression.getCompressionAlgorithmByName(conf.get( - HConstants.CONSENSUS_TRANSACTION_LOG_COMPRESSION_CODEC_KEY, - HConstants.CONSENSUS_TRANSACTION_LOG_COMPRESSION_CODEC_DEFAULT)); - executor = Executors.newSingleThreadExecutor( new DaemonThreadFactory("Quorum-Syncer-"+ context.getQuorumName() + "-")); submitWALSyncerTask(); @@ -201,7 +198,7 @@ public class QuorumAgent implements ConfigurationObserver { } } - private ListenableFuture internalCommit(List edits) + private ListenableFuture internalCommit(List edits) throws IOException { SettableFuture future = null; @@ -231,18 +228,18 @@ public class QuorumAgent implements ConfigurationObserver { return future; } - private ListenableFuture internalCommit(WALEdit edits) + private ListenableFuture internalCommit(T edits) throws IOException { return internalCommit(Arrays.asList(edits)); } /** * Append to the log synchronously. - * @param edits WALEdit to append. + * @param edits Edit to append. * @return The commit index of the committed edit. * @throws IOException */ - public long syncAppend(WALEdit edits) throws IOException { + public long syncAppend(T edits) throws IOException { checkBeforeCommit(); // increase the write size @@ -269,18 +266,18 @@ public class QuorumAgent implements ConfigurationObserver { * @return Future containing the commit index. * @throws IOException */ - public ListenableFuture asyncAppend(WALEdit edits) throws IOException { + public ListenableFuture asyncAppend(T edits) throws IOException { checkBeforeCommit(); return internalCommit(edits); } /** - * Same as asyncAppend(WALEdit), but for a list of WALEdit. + * Same as asyncAppend(Edit), but for a list of Edit. * @param edits - * @return The commit index for the list of WALEdits + * @return The commit index for the list of Edits * @throws IOException */ - public ListenableFuture asyncAppend(List edits) throws IOException { + public ListenableFuture asyncAppend(List edits) throws IOException { checkBeforeCommit(); return internalCommit(edits); } @@ -317,60 +314,55 @@ public class QuorumAgent implements ConfigurationObserver { * a strict deadline of 'commitDeadlineInMillis' on when this method * completes. * - * @param entries The list of the @WALEdit to replicate + * @param entries The list of the @Edit to replicate * @return the commit index of the replicated entries. * @throws IOException if the quorum threw an exception during the replication */ - private long syncCommit(List entries, + private long syncCommit(List entries, final SettableFuture result) throws Exception { ByteBuffer serializedEntries; ConsensusMetrics metrics = getRaftQuorumContext().getConsensusMetrics(); try (TimeStat.BlockTimer latency = - metrics.getLogSerializationLatency().time()) { - serializedEntries = WALEdit.serializeToByteBuffer(entries, - System.currentTimeMillis(), compressionCodec); + metrics.getLogSerializationLatency().time()) { + serializedEntries = codec.serializeToByteBuffer(entries, + System.currentTimeMillis()); } - int appendEntriesSize = WALEdit.getWALEditsSize(entries); + int appendEntriesSize = codec.getTotalSize(entries); metrics.getAppendEntriesSize().add(appendEntriesSize); metrics.getAppendEntriesBatchSize().add(entries.size()); - if (!compressionCodec.equals(Compression.Algorithm.NONE)) { - int compressedSize = serializedEntries.remaining() - - WALEdit.PAYLOAD_HEADER_SIZE; - metrics.getAppendEntriesCompressedSize().add(compressedSize); - } else { - // We don't use any compression, so the compressed size would be the - // same as the original size. - metrics.getAppendEntriesCompressedSize().add(appendEntriesSize); - } - if (!context.isLeader()) { - ConsensusHost leader = context.getLeader(); - throw new NewLeaderException( - leader == null ? "No leader" : leader.getHostId()); - } + // We don't use any compression, so the compressed size would be the + // same as the original size. + metrics.getAppendEntriesCompressedSize().add(appendEntriesSize); - ReplicateEntriesEvent event = new ReplicateEntriesEvent(false, - serializedEntries, result); - if (!context.offerEvent(event)) { - ConsensusHost leader = context.getLeader(); - throw new NewLeaderException( - leader == null ? "No leader" : leader.getHostId()); - } - try { - return result.get(commitDeadlineInMillis, TimeUnit.MILLISECONDS); - } catch (Throwable e) { - if (e instanceof TimeoutException) { - metrics.incAppendEntriesMissedDeadline(); - LOG.warn(String.format( - "%s Failed to commit within the deadline of %dms", context, - commitDeadlineInMillis)); - throw e; - } else { - LOG.error(context + " Quorum commit failed", e); - throw new Exception("Quorum commit failed because " + e); - } + if (!context.isLeader()) { + ConsensusHost leader = context.getLeader(); + throw new NewLeaderException( + leader == null ? "No leader" : leader.getHostId()); + } + + ReplicateEntriesEvent event = new ReplicateEntriesEvent(false, + serializedEntries, result); + if (!context.offerEvent(event)) { + ConsensusHost leader = context.getLeader(); + throw new NewLeaderException( + leader == null ? "No leader" : leader.getHostId()); + } + try { + return result.get(commitDeadlineInMillis, TimeUnit.MILLISECONDS); + } catch (Throwable e) { + if (e instanceof TimeoutException) { + metrics.incAppendEntriesMissedDeadline(); + LOG.warn(String.format( + "%s Failed to commit within the deadline of %dms", context, + commitDeadlineInMillis)); + throw e; + } else { + LOG.error(context + " Quorum commit failed", e); + throw new Exception("Quorum commit failed because " + e); } + } } private void submitWALSyncerTask() { @@ -396,7 +388,7 @@ public class QuorumAgent implements ConfigurationObserver { if (!currentList.isEmpty()) { // switch the buffer assert syncList.isEmpty(); - LinkedList tmp = syncList; + LinkedList tmp = syncList; syncList = currentList; currentList = tmp; @@ -451,8 +443,4 @@ public class QuorumAgent implements ConfigurationObserver { public long getLastCommittedIndex() { return context.getLogManager().getLastValidTransactionId().getIndex(); } - - public Compression.Algorithm getCompressionCodec() { - return compressionCodec; - } } diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/QuorumInfo.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/QuorumInfo.java index 3c10bba..e5ed257 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/QuorumInfo.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/QuorumInfo.java @@ -117,7 +117,7 @@ public class QuorumInfo { // Write the version of Quorum Membership Change payload.put(HConstants.QUORUM_MEMBERSHIP_CHANGE_VERSION); - // Write the total number of WALEdits + // Write the total number of Edits payload.putInt(configs.size()); byte[] quorumName, dcName, currPeerInfo = null; diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/RaftQuorumContext.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/RaftQuorumContext.java index 2d736d7..6716a06 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/RaftQuorumContext.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/quorum/RaftQuorumContext.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.consensus.fsm.FiniteStateMachineServiceImpl; import org.apache.hadoop.hbase.consensus.fsm.State; import org.apache.hadoop.hbase.consensus.fsm.Util; import org.apache.hadoop.hbase.consensus.log.CommitLogManagerInterface; +import org.apache.hadoop.hbase.consensus.log.EditCodec; import org.apache.hadoop.hbase.consensus.log.LogFileInfo; import org.apache.hadoop.hbase.consensus.log.TransactionLogManager; import org.apache.hadoop.hbase.consensus.metrics.ConsensusMetrics; @@ -63,7 +64,6 @@ import org.apache.thrift.protocol.TCompactProtocol; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.util.concurrent.ListenableFuture; import java.io.FileNotFoundException; import java.io.IOException; import java.io.RandomAccessFile; @@ -215,13 +215,16 @@ public class RaftQuorumContext implements ImmutableRaftContext, private ExecutorService execServiceForThriftClients; + private final EditCodec codec; + public RaftQuorumContext(final QuorumInfo info, final Configuration config, final HServerAddress consensusServerAddress, final String metricsMBeanNamePrefix, final AggregateTimer aggregateTimer, final SerialExecutorService serialExecutor, - final ExecutorService execServiceForThriftClients) { + final ExecutorService execServiceForThriftClients, + final EditCodec codec) { quorumInfo = info; conf = config; @@ -260,6 +263,7 @@ public class RaftQuorumContext implements ImmutableRaftContext, LOG.debug("RaftQuorumContext for quorum " + getQuorumName() + " initialized with rank: " + getRanking()); this.uncommittedEntries = new HashMap<>(); + this.codec = codec; } public void startStateMachines() { @@ -375,6 +379,10 @@ public class RaftQuorumContext implements ImmutableRaftContext, return regionServerAddress; } + public EditCodec getCodec() { + return codec; + } + /** * Upgrade to Joint Quorum Membership from single Quorum membership. * diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/ConsensusService.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/ConsensusService.java index 8bc0f0a..afcc1f0 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/ConsensusService.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/ConsensusService.java @@ -27,6 +27,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.ListenableFuture; import org.apache.hadoop.hbase.consensus.client.FetchTask; import org.apache.hadoop.hbase.consensus.log.LogFileInfo; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; import org.apache.hadoop.hbase.consensus.quorum.RaftQuorumContext; import org.apache.hadoop.hbase.consensus.rpc.AppendRequest; import org.apache.hadoop.hbase.consensus.rpc.AppendResponse; @@ -34,7 +35,6 @@ import org.apache.hadoop.hbase.consensus.rpc.PeerStatus; import org.apache.hadoop.hbase.consensus.rpc.VoteRequest; import org.apache.hadoop.hbase.consensus.rpc.VoteResponse; import org.apache.hadoop.hbase.ipc.thrift.exceptions.ThriftHBaseException; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import java.nio.ByteBuffer; import java.util.List; @@ -55,7 +55,7 @@ public interface ConsensusService extends AutoCloseable { @ThriftMethod(value = "replicateCommit", exception = { @ThriftException(type = ThriftHBaseException.class, id = 1) }) - ListenableFuture replicateCommit(String regionId, List txns) + ListenableFuture replicateCommit(String regionId, List txns) throws ThriftHBaseException; @ThriftMethod diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/ConsensusServiceImpl.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/ConsensusServiceImpl.java index 2376662..865aa7a 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/ConsensusServiceImpl.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/ConsensusServiceImpl.java @@ -26,6 +26,7 @@ import com.google.common.util.concurrent.ListenableFuture; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.consensus.client.FetchTask; import org.apache.hadoop.hbase.consensus.log.LogFileInfo; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; import org.apache.hadoop.hbase.consensus.log.RemoteLogFetcher; import org.apache.hadoop.hbase.consensus.protocol.ConsensusHost; import org.apache.hadoop.hbase.consensus.quorum.QuorumAgent; @@ -43,7 +44,6 @@ import org.apache.hadoop.hbase.consensus.rpc.VoteRequest; import org.apache.hadoop.hbase.consensus.rpc.VoteResponse; import org.apache.hadoop.hbase.ipc.thrift.exceptions.ThriftHBaseException; import org.apache.hadoop.hbase.regionserver.DataStoreState; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -123,7 +123,7 @@ public class ConsensusServiceImpl implements ConsensusService { } @Override - public ListenableFuture replicateCommit(String regionId, List txns) throws ThriftHBaseException { + public ListenableFuture replicateCommit(String regionId, List txns) throws ThriftHBaseException { RaftQuorumContext c = getRaftQuorumContext(regionId); if (c != null) { QuorumAgent agent = c.getQuorumAgentInstance(); diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/LocalConsensusServer.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/LocalConsensusServer.java index 8094b4c..06be1a0 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/LocalConsensusServer.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/server/LocalConsensusServer.java @@ -20,18 +20,18 @@ package org.apache.hadoop.hbase.consensus.server; */ +import com.facebook.nifty.codec.DefaultThriftFrameCodecFactory; +import com.facebook.nifty.codec.ThriftFrameCodecFactory; +import com.facebook.nifty.core.NiftyTimer; +import com.facebook.nifty.duplex.TDuplexProtocolFactory; +import com.facebook.swift.codec.ThriftCodecManager; +import com.facebook.swift.service.ThriftEventHandler; +import com.facebook.swift.service.ThriftServer; +import com.facebook.swift.service.ThriftServerConfig; +import com.facebook.swift.service.ThriftServiceProcessor; +import com.google.common.collect.ImmutableMap; +import com.google.inject.Inject; import io.airlift.units.Duration; - -import java.io.IOException; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.consensus.client.QuorumClient; +import org.apache.hadoop.hbase.consensus.log.QuorumEditCodec; import org.apache.hadoop.hbase.consensus.quorum.AggregateTimer; import org.apache.hadoop.hbase.consensus.quorum.QuorumInfo; import org.apache.hadoop.hbase.consensus.quorum.RaftQuorumContext; @@ -58,17 +59,15 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.weakref.jmx.MBeanExporter; -import com.facebook.nifty.codec.DefaultThriftFrameCodecFactory; -import com.facebook.nifty.codec.ThriftFrameCodecFactory; -import com.facebook.nifty.core.NiftyTimer; -import com.facebook.nifty.duplex.TDuplexProtocolFactory; -import com.facebook.swift.codec.ThriftCodecManager; -import com.facebook.swift.service.ThriftEventHandler; -import com.facebook.swift.service.ThriftServer; -import com.facebook.swift.service.ThriftServerConfig; -import com.facebook.swift.service.ThriftServiceProcessor; -import com.google.common.collect.ImmutableMap; -import com.google.inject.Inject; +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; public class LocalConsensusServer { public static final ImmutableMap @@ -348,7 +347,8 @@ public class LocalConsensusServer { (regionId + "."), consensusServer.aggregateTimer, consensusServer.serialExecutorService, - consensusServer.execServiceForThriftClients + consensusServer.execServiceForThriftClients, + new QuorumEditCodec() ); context.getConsensusMetrics().export(mbeanExporter); diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/util/RaftUtil.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/util/RaftUtil.java index 776d159..9647dbf 100644 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/util/RaftUtil.java +++ b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/consensus/util/RaftUtil.java @@ -30,13 +30,11 @@ import com.google.common.base.Joiner; import com.google.common.collect.ImmutableSet; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.consensus.quorum.AggregateTimer; import org.apache.hadoop.hbase.consensus.quorum.QuorumInfo; import org.apache.hadoop.hbase.consensus.quorum.RepeatingTimer; import org.apache.hadoop.hbase.consensus.quorum.TimeoutEventHandler; import org.apache.hadoop.hbase.consensus.quorum.Timer; -import org.apache.hadoop.hbase.util.Bytes; import org.jboss.netty.channel.socket.nio.NioSocketChannelConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java b/hbase-consensus/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java deleted file mode 100644 index b1222bf..0000000 --- a/hbase-consensus/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java +++ /dev/null @@ -1,521 +0,0 @@ -/** - * Copyright 2009 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver.wal; - -import com.facebook.swift.codec.ThriftConstructor; -import com.facebook.swift.codec.ThriftField; -import com.facebook.swift.codec.ThriftStruct; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.SettableFuture; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.io.HeapSize; -import org.apache.hadoop.hbase.io.hfile.Compression; -import org.apache.hadoop.hbase.ipc.ByteBufferOutputStream; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.ClassSize; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.compress.Compressor; -import org.apache.hadoop.io.compress.Decompressor; - -import java.io.*; -import java.nio.ByteBuffer; -import java.util.*; - -/** - * WALEdit: Used in HBase's transaction log (WAL) to represent - * the collection of edits (KeyValue objects) corresponding to a - * single transaction. The class implements "Writable" interface - * for serializing/deserializing a set of KeyValue items. - * - * Previously, if a transaction contains 3 edits to c1, c2, c3 for a row R, - * the HLog would have three log entries as follows: - * - * : - * : - * : - * - * This presents problems because row level atomicity of transactions - * was not guaranteed. If we crash after few of the above appends make - * it, then recovery will restore a partial transaction. - * - * In the new world, all the edits for a given transaction are written - * out as a single record, for example: - * - * : - * - * where, the WALEdit is serialized as: - * <-1, # of edits, , , ... > - * For example: - * <-1, 3, , , > - * - * The -1 marker is just a special way of being backward compatible with - * an old HLog which would have contained a single . - * - * The deserializer for WALEdit backward compatibly detects if the record - * is an old style KeyValue or the new style WALEdit. - * - */ - -@ThriftStruct -public final class WALEdit implements Writable, HeapSize { - public enum PayloadHeaderField { - MAGIC(0, Bytes.SIZEOF_BYTE), - TYPE(1, Bytes.SIZEOF_BYTE), - VERSION(2, Bytes.SIZEOF_BYTE), - TIMESTAMP(3, Bytes.SIZEOF_LONG), - COMPRESSION_CODEC(11, Bytes.SIZEOF_BYTE), - UNCOMPRESSED_LENGTH(12, Bytes.SIZEOF_INT), - NUM_WALEDITS(16, Bytes.SIZEOF_INT); - - final int offset; - final int length; - - private PayloadHeaderField(final int offset, final int length) { - this.offset = offset; - this.length = length; - } - } - - public final static int PAYLOAD_HEADER_SIZE = - PayloadHeaderField.NUM_WALEDITS.offset + - PayloadHeaderField.NUM_WALEDITS.length; - - private final int VERSION_2 = -1; - - private final List kvs; - - private NavigableMap scopes; - - private long length = 0; - - private SettableFuture commitFuture; - - @ThriftConstructor - public WALEdit( - @ThriftField(1) final List kvs) { - this.kvs = kvs; - for (KeyValue k : kvs) { - length += k.getLength(); - } - } - - public WALEdit() { - kvs = new ArrayList(); - } - - public SettableFuture getCommitFuture() { - return commitFuture; - } - - public void add(KeyValue kv) { - this.kvs.add(kv); - length += kv.getLength(); - } - - public boolean isEmpty() { - return kvs.isEmpty(); - } - - public long getTotalKeyValueLength() { - return length; - } - - public int size() { - return kvs.size(); - } - - @ThriftField(1) - public List getKeyValues() { - return kvs; - } - - public NavigableMap getScopes() { - return scopes; - } - - public void setScopes (NavigableMap scopes) { - // We currently process the map outside of WALEdit, - // TODO revisit when replication is part of core - this.scopes = scopes; - } - - public void readFields(DataInput in) throws IOException { - kvs.clear(); - if (scopes != null) { - scopes.clear(); - } - int versionOrLength = in.readInt(); - if (versionOrLength == VERSION_2) { - // this is new style HLog entry containing multiple KeyValues. - int numEdits = in.readInt(); - for (int idx = 0; idx < numEdits; idx++) { - KeyValue kv = new KeyValue(); - kv.readFields(in); - this.add(kv); - } - int numFamilies = in.readInt(); - if (numFamilies > 0) { - if (scopes == null) { - scopes = new TreeMap(Bytes.BYTES_COMPARATOR); - } - for (int i = 0; i < numFamilies; i++) { - byte[] fam = Bytes.readByteArray(in); - int scope = in.readInt(); - scopes.put(fam, scope); - } - } - } else { - // this is an old style HLog entry. The int that we just - // read is actually the length of a single KeyValue. - KeyValue kv = new KeyValue(); - kv.readFields(versionOrLength, in); - this.add(kv); - } - - } - - public void write(DataOutput out) throws IOException { - out.writeInt(VERSION_2); - out.writeInt(kvs.size()); - // We interleave the two lists for code simplicity - for (KeyValue kv : kvs) { - kv.write(out); - } - if (scopes == null) { - out.writeInt(0); - } else { - out.writeInt(scopes.size()); - for (byte[] key : scopes.keySet()) { - Bytes.writeByteArray(out, key); - out.writeInt(scopes.get(key)); - } - } - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - - sb.append("[#edits: " + kvs.size() + " = <"); - for (KeyValue kv : kvs) { - sb.append(kv.toString()); - sb.append("; "); - } - if (scopes != null) { - sb.append(" scopes: " + scopes.toString()); - } - sb.append(">]"); - return sb.toString(); - } - - @Override - public long heapSize() { - long ret = 0; - for (KeyValue kv : kvs) { - ret += kv.heapSize(); - } - if (scopes != null) { - ret += ClassSize.TREEMAP; - ret += ClassSize.align(scopes.size() * ClassSize.MAP_ENTRY); - } - return ret; - } - - /** - * Serialize the given list of WALEdits to an OutputStream. - * @param edits the WALEdits to be serialized - * @param os the {@link DataOutputStream} to write to - * @throws IOException if the output could not be written to the stream - */ - private static void serializeWALEdits(final List edits, - final DataOutputStream os) throws IOException { - for (final WALEdit e : edits) { - os.writeInt(e.getKeyValues().size()); - for (final KeyValue k : e.getKeyValues()) { - os.writeInt(k.getLength()); - os.write(k.getBuffer(), k.getOffset(), k.getLength()); - } - } - } - - /** - * Serialize the given list of WALEdits edits to a {@link ByteBuffer}, - * optionally compressing the WALEdit data using the given compression codec. - * - * @param edits the list of WALEdits - * @return a {@link ByteBuffer} containing a serialized representation of the - * WALEdits. - * @throws java.io.IOException if the WALEdits could not be serialized - */ - public static ByteBuffer serializeToByteBuffer(final List edits, - long timestamp, Compression.Algorithm codec) throws IOException { - Preconditions.checkNotNull(codec); - if (edits == null) { - return null; - } - - int totalPayloadSize = getTotalPayloadSize(edits); - ByteBufferOutputStream buffer = new ByteBufferOutputStream( - totalPayloadSize); - try (DataOutputStream os = new DataOutputStream(buffer)) { - // Write the magic value - os.write(HConstants.CONSENSUS_PAYLOAD_MAGIC_VALUE); - - // Write that the payload is WALEdit - os.write(HConstants.BATCHED_WALEDIT_TYPE); - - // Write the version of WALEdit - os.write(HConstants.BATCHED_WALEDIT_VERSION); - - // Write the timestamp - os.writeLong(timestamp); - - // Write compression algorithm - os.write((byte) codec.ordinal()); - - // Write uncompressed size of the list of WALEdits - os.writeInt(totalPayloadSize - PAYLOAD_HEADER_SIZE); - - // Write the number of WALEdits in the list - os.writeInt(edits.size()); - } - - // Turn on compression if requested when serializing the list of WALEdits. - boolean compressed = !codec.equals(Compression.Algorithm.NONE); - Compressor compressor = codec.getCompressor(); - try (DataOutputStream os = new DataOutputStream(compressed ? - codec.createCompressionStream(buffer, compressor, - totalPayloadSize - PAYLOAD_HEADER_SIZE) : buffer)) { - serializeWALEdits(edits, os); - } finally { - codec.returnCompressor(compressor); - } - - // Flip and return the byte buffer. - return buffer.getByteBuffer(); - } - - public static int getWALEditsSize(final List edits) { - int size = 0; - for (final WALEdit e : edits) { - size += Bytes.SIZEOF_INT + e.getKeyValues().size() * Bytes.SIZEOF_INT - + e.getTotalKeyValueLength(); - } - return size; - } - - public static int getTotalPayloadSize(final List edits) { - return PAYLOAD_HEADER_SIZE + getWALEditsSize(edits); - } - - public static boolean isBatchedWALEdit(final ByteBuffer data) { - // Read the Magic Value - if (data.get(data.position() + PayloadHeaderField.MAGIC.offset) != - HConstants.CONSENSUS_PAYLOAD_MAGIC_VALUE) { - return false; - } - - // Read the payload type - if (data.get(data.position() + PayloadHeaderField.TYPE.offset) != - HConstants.BATCHED_WALEDIT_TYPE) { - return false; - } - return true; - } - - /** - * Get the timestamp of the batched WALEdit. This method assumes the given - * ByteBuffer contains a valid batched WALEdits which can be verified using - * {@link #isBatchedWALEdit}. - */ - public static long getTimestamp(final ByteBuffer data) { - return data.getLong(data.position() + PayloadHeaderField.TIMESTAMP.offset); - } - - /** - * Get the compression codec used to compress the serialized WALEdits - * contained in the given {@link ByteBuffer}. This method assumes the position - * of the buffer to point to the does not change the - * position value of the buffer and assumes the caller has performed a version - * check on the buffer to ensure the - * - * @param data a {@link java.nio.ByteBuffer} containing a serialized list of - * WALEdits - * @return the compression codec or the NONE codec if the WALEdit was written - * with a version which does not support compression - */ - public static Compression.Algorithm getCompressionCodec( - final ByteBuffer data) { - byte codecValue = data.get(data.position() + - PayloadHeaderField.COMPRESSION_CODEC.offset); - Compression.Algorithm[] codecs = Compression.Algorithm.values(); - if (codecValue >= 0 && codecValue < codecs.length) { - return codecs[codecValue]; - } - return Compression.Algorithm.NONE; - } - - /** - * Wrap the array backing the given ByteBuffer with a ByteArrayInputStream. - * Since this InputStream works on the underlying array the state of the given - * ByteBuffer is guaranteed to remain unchanged. - * - * @param buffer an array backed {@link ByteBuffer} - * @param position the position in the buffer from where to start the stream - * @param length length of the input stream - * @return an {@link java.io.InputStream} wrapping the underlying array of - * the given {@link ByteBuffer} - */ - private static ByteArrayInputStream getByteArrayInputStream( - final ByteBuffer buffer, final int position, final int length) { - Preconditions.checkArgument(buffer.hasArray(), - "An array backed buffer is required"); - Preconditions.checkArgument(position >= buffer.position(), - "Position can not be behind buffer.position()"); - Preconditions.checkArgument( - position - buffer.position() + length <= buffer.remaining(), - "Length can not be past the remainder of the buffer"); - return new ByteArrayInputStream(buffer.array(), - buffer.arrayOffset() + position, length); - } - - /** - * Read a list of serialized WALEdits from the given - * {@link DataInputStream}, instantiating them backed by the given - * {@link ByteBuffer}. - * - * @param numEdits the number of WALEdits expected in the stream - * @param is the {@link InputStream} containing serialized WALEdits - * @param buffer the {@link ByteBuffer} to be used to back the KVs - * @param offset the offset in the buffer from where to copy the KVs - * @param copyToBuffer copy from the stream to the buffer if true, assume the - * steam data is already in the buffer otherwise - * @return a list of WALEdits - * @throws IOException if an exception occurs while reading from the stream - */ - private static List deserializeWALEdits(final int numEdits, - final DataInputStream is, final ByteBuffer buffer, final int offset, - final boolean copyToBuffer) throws IOException { - List edits = new ArrayList<>(numEdits); - byte[] array = buffer.array(); - int cursor = buffer.arrayOffset() + offset; - - for (int editIdx = 0; editIdx < numEdits; ++editIdx) { - WALEdit edit = new WALEdit(); - int numKVs = is.readInt(); - cursor += Bytes.SIZEOF_INT; - - for (int kvIdx = 0; kvIdx < numKVs; ++kvIdx) { - int kvLen = is.readInt(); - cursor += Bytes.SIZEOF_INT; - - if (copyToBuffer) { - // If the buffer does not contain the data yet (which would be the - // case if it is compressed), copy from the InputStream to the buffer. - is.read(array, cursor, kvLen); - } else { - // Do not copy to the buffer and advance the stream cursor. - is.skipBytes(kvLen); - } - - // Instantiate the KV backed by the ByteBuffer - edit.add(new KeyValue(array, cursor, kvLen)); - // Move the ByteBuffer write cursor - cursor += kvLen; - } - - edits.add(edit); - } - - return edits; - } - - /** - * De-serializes a ByteBuffer to list of WALEdits. If the serialized WALEdits - * are not compressed, the resulting list of KVs will be backed by the array - * backing the ByteBuffer instead of allocating fresh buffers. As a - * consequence of this method assumes the state of the ByteBuffer is never - * modified. - * - * @param data a {@link ByteBuffer} containing serialized WALEdits - * @return a list of WALEdits - * @throws java.io.IOException if the WALEdits could not be deserialized - */ - public static List deserializeFromByteBuffer(final ByteBuffer data) - throws IOException { - if (!isBatchedWALEdit(data)) { - return null; - } - - int firstBytePosition = data.position(); - int bufferLength = data.remaining(); - - // The check above already read the magic value and type fields, so move on - // to the version field. - byte version = data.get(firstBytePosition + - PayloadHeaderField.VERSION.offset); - if (version != HConstants.BATCHED_WALEDIT_VERSION) { - return null; - } - - // Get the compression codec and uncompressed size of the list of WALEdits. - // Use the remainder of the current buffer as a hint. - Compression.Algorithm codec = getCompressionCodec(data); - - int uncompressedEditsLen = data.getInt(firstBytePosition + - PayloadHeaderField.UNCOMPRESSED_LENGTH.offset); - int numEdits = data.getInt(firstBytePosition + - PayloadHeaderField.NUM_WALEDITS.offset); - - if (numEdits == 0) { - return Collections.emptyList(); - } - - // Wrap the remainder of the given ByteBuffer with a DataInputStream and - // de-serialize the list of WALEdits. - // - // If the WALEdits are compressed, wrap the InputStream by a decompression - // stream and allocate a new buffer to store the uncompressed data. - int cursor = firstBytePosition + PAYLOAD_HEADER_SIZE; - InputStream is = getByteArrayInputStream(data, cursor, - bufferLength - PAYLOAD_HEADER_SIZE); - ByteBuffer deserializedData = data; - - final boolean compressed = !codec.equals(Compression.Algorithm.NONE); - Decompressor decompressor = codec.getDecompressor(); - try { - if (compressed) { - int compressedEditsLen = bufferLength - PAYLOAD_HEADER_SIZE; - is = codec.createDecompressionStream(is, decompressor, - compressedEditsLen); - // Allocate a new ByteBuffer for the uncompressed data. - deserializedData = ByteBuffer.allocate(uncompressedEditsLen); - cursor = 0; - } - - try (DataInputStream dis = new DataInputStream(is)) { - return deserializeWALEdits(numEdits, dis, deserializedData, cursor, - compressed); - } - } finally { - codec.returnDecompressor(decompressor); - } - } -} diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/LocalTestBed.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/LocalTestBed.java index b5513d2..f43b097 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/LocalTestBed.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/LocalTestBed.java @@ -23,25 +23,33 @@ package org.apache.hadoop.hbase.consensus; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.consensus.exceptions.LeaderNotReadyException; import org.apache.hadoop.hbase.consensus.exceptions.NewLeaderException; +import org.apache.hadoop.hbase.consensus.log.Edit; import org.apache.hadoop.hbase.consensus.quorum.QuorumAgent; import org.apache.hadoop.hbase.consensus.quorum.QuorumInfo; import org.apache.hadoop.hbase.consensus.quorum.RaftQuorumContext; import org.apache.hadoop.hbase.consensus.server.InstrumentedConsensusServiceImpl; import org.apache.hadoop.hbase.consensus.server.LocalConsensusServer; import org.apache.hadoop.hbase.consensus.server.peer.states.PeerHandleAppendResponse; - import org.apache.hadoop.hbase.consensus.util.RaftUtil; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; -import org.apache.hadoop.hbase.util.Bytes; import org.junit.Assert; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; -import java.util.concurrent.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; @@ -392,7 +400,7 @@ public class LocalTestBed { private EventType type; private long when; private List nodes; - private List edits; + private List edits; private long sleepAmount; private Map> delayMap; @@ -429,7 +437,7 @@ public class LocalTestBed { return delayMap; } - public List edits() { + public List edits() { return edits; } @@ -453,7 +461,7 @@ public class LocalTestBed { return this; } - public TestEvent newCommits(List edits) { + public TestEvent newCommits(List edits) { this.edits = edits; return this; } @@ -464,10 +472,10 @@ public class LocalTestBed { READY, RUNNING, DONE, FAILED } private LocalTestBed testbed; - private List edits; + private List edits; private State state = State.READY; - public TestRequest(LocalTestBed testbed, List edits) { + public TestRequest(LocalTestBed testbed, List edits) { this.testbed = testbed; this.edits = edits; } @@ -604,7 +612,8 @@ public class LocalTestBed { } // flip a coin if (now >= nextCommitTime) { - events.add(new TestEvent(TestEvent.EventType.NEW_COMMITS).newCommits(generateTestingWALEdit())); + events.add(new TestEvent(TestEvent.EventType.NEW_COMMITS).newCommits( + RAFT_TEST_UTIL.generateTransaction(1, 100))); nextCommitTime += poissonRandomInterarrivalDelay(prng, config.getDouble("packet-arrival-lambda", 100)); ncommits ++; } @@ -780,11 +789,6 @@ public class LocalTestBed { mockLogs = logs; } - private static List generateTestingWALEdit() { - KeyValue kv = KeyValue.createFirstOnRow(Bytes.toBytes("TestQuorum")); - return Arrays.asList(new WALEdit(Arrays.asList(kv))); - } - public void dumpStates() { RAFT_TEST_UTIL.dumpStates(quorumInfo); LOG.info("Total Commit = " + commitSuccessCount.get()+ " successes and " + commitFailureCount.get() + " failures " @@ -801,7 +805,7 @@ public class LocalTestBed { } } - private boolean doCommit(final WALEdit edit) { + private boolean doCommit(final Edit edit) { if (edit == null) { return true; } @@ -839,13 +843,13 @@ public class LocalTestBed { } } - public boolean testSingleCommit(List edits) { + public boolean testSingleCommit(List edits) { boolean success = true; if (edits.size() == 0) { return true; } - for (final WALEdit edit : edits) { + for (final Edit edit : edits) { if ((success = doCommit(edit)) == false) { break; } diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/RaftTestUtil.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/RaftTestUtil.java index fdd3009..4e6a7b7 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/RaftTestUtil.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/RaftTestUtil.java @@ -22,18 +22,18 @@ package org.apache.hadoop.hbase.consensus; import com.facebook.swift.service.ThriftEventHandler; import com.facebook.swift.service.ThriftServerConfig; -import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.consensus.client.QuorumClient; import org.apache.hadoop.hbase.consensus.client.QuorumThriftClientAgent; import org.apache.hadoop.hbase.consensus.log.CommitLogManagerInterface; +import org.apache.hadoop.hbase.consensus.log.Edit; +import org.apache.hadoop.hbase.consensus.log.EditCodec; import org.apache.hadoop.hbase.consensus.log.InMemoryLogManager; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; +import org.apache.hadoop.hbase.consensus.log.QuorumEditCodec; import org.apache.hadoop.hbase.consensus.log.TransactionLogManager; import org.apache.hadoop.hbase.consensus.protocol.EditId; import org.apache.hadoop.hbase.consensus.quorum.AggregateTimer; @@ -44,9 +44,6 @@ import org.apache.hadoop.hbase.consensus.server.ConsensusServiceImpl; import org.apache.hadoop.hbase.consensus.server.InstrumentedConsensusServiceImpl; import org.apache.hadoop.hbase.consensus.server.LocalConsensusServer; import org.apache.hadoop.hbase.consensus.util.RaftUtil; -import org.apache.hadoop.hbase.io.hfile.Compression; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.DaemonThreadFactory; import org.apache.hadoop.hbase.util.serial.SerialExecutorService; import org.apache.hadoop.net.DNS; @@ -62,6 +59,7 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; @@ -91,6 +89,10 @@ public class RaftTestUtil { public static String LOCAL_HOST; + private static QuorumEditCodec codec = new QuorumEditCodec(); + + private static Random random = new Random(System.currentTimeMillis()); + static { try { LOCAL_HOST = DNS.getDefaultHost("default", "default"); @@ -188,9 +190,9 @@ public class RaftTestUtil { try { return raftQuorumContextClass.getConstructor(QuorumInfo.class, Configuration.class, HServerAddress.class, String.class, AggregateTimer.class, SerialExecutorService.class, - ExecutorService.class).newInstance(info, conf, address, + ExecutorService.class, EditCodec.class).newInstance(info, conf, address, mBeansPrefix, server.aggregateTimer, server.serialExecutorService, - server.getExecServiceForThriftClients()); + server.getExecServiceForThriftClients(), codec); } catch (Exception e) { LOG.error("Could not construct a RaftQuorumContext of type: " + raftQuorumContextClass + ", because of: ", e); @@ -393,11 +395,8 @@ public class RaftTestUtil { log.initialize(context); int index = 0; - List kvs = new ArrayList<>(); - kvs.add(KeyValue.LOWESTKEY); - WALEdit edit = new WALEdit(kvs); - List entries = new ArrayList<>(); - entries.add(edit); + List entries = new ArrayList<>(); + entries.add(QuorumEdit.generateRandomQuorumEdit(random, 100)); long prevTerm = HConstants.UNDEFINED_TERM_INDEX; long committedIndex = HConstants.UNDEFINED_TERM_INDEX; @@ -408,8 +407,7 @@ public class RaftTestUtil { committedIndex = index - 1; } log.append(new EditId(term, index), committedIndex, - WALEdit.serializeToByteBuffer(entries, System.currentTimeMillis(), - Compression.Algorithm.NONE)); + codec.serializeToByteBuffer(entries, System.currentTimeMillis())); ++index; prevTerm = term; } @@ -560,11 +558,6 @@ public class RaftTestUtil { Math.max(rank--, 0)); } - HTableDescriptor table = new HTableDescriptor(RaftTestUtil.class.getName()); - byte [] FAMILY = Bytes.toBytes("family"); - HColumnDescriptor hcd = new HColumnDescriptor(FAMILY).setMaxVersions(Integer.MAX_VALUE); - table.addFamily(hcd); - Map> peerMap = new HashMap<>(); peerMap.put(QuorumInfo.LOCAL_DC_KEY, peers); @@ -590,18 +583,10 @@ public class RaftTestUtil { return logs; } - public static List generateTransaction(int size) { - List testData = new ArrayList<>(); - for (int i = 0; i < 10; i++) { - String payloadSizeName = FileUtils.byteCountToDisplaySize(size); - KeyValue kv = KeyValue.generateKeyValue(payloadSizeName, size); - - List kvs = new ArrayList<>(); - for (int j = 0; j < 10; j++) { - kvs.add(kv); - } - - testData.add(new WALEdit(kvs)); + public static List generateTransaction(int numberOfEdits, int sizeOfEachEdit) { + List testData = new ArrayList<>(); + for (int i = 0; i < numberOfEdits; i++) { + testData.add(QuorumEdit.generateRandomQuorumEdit(random, sizeOfEachEdit)); } return testData; } diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/ReplicationLoadForUnitTest.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/ReplicationLoadForUnitTest.java index d0656b4..f3265d9 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/ReplicationLoadForUnitTest.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/ReplicationLoadForUnitTest.java @@ -76,7 +76,7 @@ public class ReplicationLoadForUnitTest { try { while (!stop) { try { - client.replicateCommits(RaftTestUtil.generateTransaction(1 * 1024)); + client.replicateCommits(RaftTestUtil.generateTransaction(10, 1 * 1024)); if ((++transactionNums) % progressInterval == 0) { System.out.println("Sent " + transactionNums + " transactions to the quorum"); util.printStatusOfQuorum(quorumInfo); diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicPeerSeeding.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicPeerSeeding.java index c2cc247..a94dfbc 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicPeerSeeding.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicPeerSeeding.java @@ -20,18 +20,9 @@ package org.apache.hadoop.hbase.consensus; */ -import static junit.framework.Assert.fail; - -import java.util.Arrays; -import java.util.Collection; -import java.util.List; - import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.consensus.client.QuorumClient; import org.apache.hadoop.hbase.consensus.quorum.QuorumInfo; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; -import org.apache.hadoop.hbase.util.Bytes; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -40,6 +31,11 @@ import org.junit.runners.Parameterized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Collection; +import java.util.List; + +import static junit.framework.Assert.fail; + @RunWith(value = Parameterized.class) public class TestBasicPeerSeeding { private static final Logger LOG = LoggerFactory.getLogger( @@ -103,7 +99,7 @@ public class TestBasicPeerSeeding { try { RAFT_TEST_UTIL.dumpStates(quorumInfo); - client.replicateCommits(Arrays.asList(generateTestingWALEdit())); + client.replicateCommits(RAFT_TEST_UTIL.generateTransaction(10, 100)); RAFT_TEST_UTIL.dumpStates(quorumInfo); // Verify all the logs across the majority are the same RAFT_TEST_UTIL.verifyLogs(quorumInfo, QUORUM_MAJORITY); @@ -112,9 +108,4 @@ public class TestBasicPeerSeeding { fail("Unexpected exception: e"); } } - - private static WALEdit generateTestingWALEdit() { - KeyValue kv = KeyValue.createFirstOnRow(Bytes.toBytes("TestQuorum")); - return new WALEdit(Arrays.asList(kv)); - } } diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicPeerSlow.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicPeerSlow.java index f44b989..bbc73ef 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicPeerSlow.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicPeerSlow.java @@ -222,7 +222,7 @@ public class TestBasicPeerSlow { while (!stop) { try { client - .replicateCommits(RaftTestUtil.generateTransaction(1 * 1024)); + .replicateCommits(RaftTestUtil.generateTransaction(10, 1 * 1024)); if ((++transactionNums) % progressInterval == 0) { System.out.println("Sent " + transactionNums + "transactions to the quorum"); diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicQuorumCommit.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicQuorumCommit.java index b704f50..7f9d72a 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicQuorumCommit.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestBasicQuorumCommit.java @@ -20,21 +20,17 @@ package org.apache.hadoop.hbase.consensus; */ +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.consensus.client.QuorumClient; import org.apache.hadoop.hbase.consensus.quorum.QuorumInfo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; -import org.apache.hadoop.hbase.util.Bytes; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -100,7 +96,7 @@ public class TestBasicQuorumCommit { private void testSingleCommit() { try { RAFT_TEST_UTIL.dumpStates(quorumInfo); - client.replicateCommits(Arrays.asList(generateTestingWALEdit())); + client.replicateCommits(RAFT_TEST_UTIL.generateTransaction(10, 100)); RAFT_TEST_UTIL.dumpStates(quorumInfo); // Verify all the logs across the majority are the same RAFT_TEST_UTIL.verifyLogs(quorumInfo, QUORUM_MAJORITY); @@ -109,9 +105,4 @@ public class TestBasicQuorumCommit { fail("Unexpected exception: " + e.getMessage()); } } - - private static WALEdit generateTestingWALEdit() { - KeyValue kv = KeyValue.createFirstOnRow(Bytes.toBytes("TestQuorum")); - return new WALEdit(Arrays.asList(kv)); - } } diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestCommitDeadline.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestCommitDeadline.java index 4527b0a..f3d5069 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestCommitDeadline.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestCommitDeadline.java @@ -48,14 +48,14 @@ public class TestCommitDeadline { long deadline = HConstants.QUORUM_AGENT_COMMIT_DEADLINE_DEFAULT; // Do a transaction to make the QuorumClient lookup the leader. - client.replicateCommits(RAFT_TEST_UTIL.generateTransaction(1024)); + client.replicateCommits(RAFT_TEST_UTIL.generateTransaction(10, 1024)); leader = client.getLeader(); assertNotNull(leader); // A successful commit should complete within the set deadline. Stopwatch stopwatch = new Stopwatch(); //Stopwatch.createStarted(); assertTrue(leader.replicateCommit(quorumInfo.getQuorumName(), - RAFT_TEST_UTIL.generateTransaction(1024)) > 0); + RAFT_TEST_UTIL.generateTransaction(10, 1024)) > 0); assertTrue("The commit should complete within the deadline", stopwatch.elapsedTime(TimeUnit.MILLISECONDS) < deadline); @@ -71,7 +71,7 @@ public class TestCommitDeadline { Exception expectedException = null; try { leader.replicateCommit(quorumInfo.getQuorumName(), - RAFT_TEST_UTIL.generateTransaction(1024)); + RAFT_TEST_UTIL.generateTransaction(10, 1024)); } catch (Exception e) { expectedException = e; } diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestLogFileViewer.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestLogFileViewer.java index 6a13f8b..e8569ca 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestLogFileViewer.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestLogFileViewer.java @@ -21,13 +21,12 @@ package org.apache.hadoop.hbase.consensus; import junit.framework.Assert; -import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.consensus.log.Edit; +import org.apache.hadoop.hbase.consensus.log.EditCodec; import org.apache.hadoop.hbase.consensus.log.LogFileViewer; -import org.apache.hadoop.hbase.consensus.log.LogReader; import org.apache.hadoop.hbase.consensus.log.LogWriter; -import org.apache.hadoop.hbase.io.hfile.Compression; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; -import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; +import org.apache.hadoop.hbase.consensus.log.QuorumEditCodec; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,15 +36,18 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.util.Arrays; import java.util.List; +import java.util.Random; public class TestLogFileViewer { private static final Logger LOG = LoggerFactory.getLogger(TestLogFileViewer.class); + private Random random; + private EditCodec codec = new QuorumEditCodec(); @Test public void testViewer() throws IOException { + this.random = new Random(System.currentTimeMillis()); final int numTXNs = 100; - final KeyValue.KVComparator comparator = new KeyValue.KVComparator(); // Initialize the writer File file = new File("TestLogFileViewer"); @@ -59,28 +61,24 @@ public class TestLogFileViewer { // Write the numTXNs to the log file long curIndex, filePosition; - List txns; - WALEdit edit; + List txns; + Edit edit; for (curIndex = initialIndex; curIndex < numTXNs; curIndex++) { - edit = new WALEdit(); - edit.add(new KeyValue(Bytes.toBytes(curIndex), curIndex)); + edit = QuorumEdit.generateRandomQuorumEdit(random, 100); txns = Arrays.asList(edit); - writer.append(curIndex, WALEdit.serializeToByteBuffer(txns, 1234567890L, - Compression.Algorithm.NONE)); + writer.append(curIndex, codec.serializeToByteBuffer(txns, 1234567890L)); // Test the truncate for every 10 entries; if (curIndex % 10 == 0) { // Write some dummy data to be truncated filePosition = writer.getCurrentPosition(); - edit = new WALEdit(); - edit.add(new KeyValue(Bytes.toBytes("ToBeTruncated"), System.currentTimeMillis())); + edit = QuorumEdit.generateRandomQuorumEdit(random, 100); txns = Arrays.asList(edit); long tmpIndex = curIndex + 1; long tmpOffset = writer.append(tmpIndex, - WALEdit.serializeToByteBuffer(txns, 1234567890L, - Compression.Algorithm.NONE)); + codec.serializeToByteBuffer(txns, 1234567890L)); Assert.assertEquals(filePosition, tmpOffset); @@ -94,6 +92,6 @@ public class TestLogFileViewer { // Close the writer writer.close(); - LogFileViewer.dumpFileInfo(file, false); + LogFileViewer.dumpFileInfo(file, false, codec); } } diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestLogWriterAndReader.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestLogWriterAndReader.java index 87ae7b8..2a1de65 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestLogWriterAndReader.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestLogWriterAndReader.java @@ -22,22 +22,26 @@ package org.apache.hadoop.hbase.consensus; import junit.framework.Assert; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.io.hfile.Compression; -import org.apache.hadoop.hbase.util.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.consensus.log.Edit; import org.apache.hadoop.hbase.consensus.log.LogReader; import org.apache.hadoop.hbase.consensus.log.LogWriter; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; +import org.apache.hadoop.hbase.consensus.log.QuorumEditCodec; +import org.apache.hadoop.hbase.util.Arena; +import org.apache.hadoop.hbase.util.BucketAllocator; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.InHeapArena; +import org.apache.hadoop.hbase.util.MemoryBuffer; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; -import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; +import java.util.Random; public class TestLogWriterAndReader { private static final Logger LOG = LoggerFactory.getLogger( @@ -46,10 +50,13 @@ public class TestLogWriterAndReader { private final Arena arena = new InHeapArena(BucketAllocator.DEFAULT_BUCKETS, HConstants.ARENA_CAPACITY_DEFAULT); + private QuorumEditCodec codec = new QuorumEditCodec(); + + private Random random = new Random(System.currentTimeMillis()); + @Test public void testSequentialWriteAndRead() throws IOException { final int numTXNs = 100; - final KeyValue.KVComparator comparator = new KeyValue.KVComparator(); // Initialize the writer File file = new File("testSequentialWriteAndRead"); @@ -63,28 +70,24 @@ public class TestLogWriterAndReader { // Write the numTXNs to the log file long curIndex, filePosition; - List txns; - WALEdit edit; + List txns; + Edit edit; for (curIndex = initialIndex; curIndex < numTXNs; curIndex++) { - edit = new WALEdit(); - edit.add(new KeyValue(Bytes.toBytes(curIndex), curIndex)); + edit = new QuorumEdit(Bytes.toBytes(curIndex)); txns = Arrays.asList(edit); - writer.append(curIndex, WALEdit.serializeToByteBuffer(txns, 1234567890L, - Compression.Algorithm.NONE)); + writer.append(curIndex, codec.serializeToByteBuffer(txns, 1234567890L)); // Test the truncate for every 10 entries; if (curIndex % 10 == 0) { // Write some dummy data to be truncated filePosition = writer.getCurrentPosition(); - edit = new WALEdit(); - edit.add(new KeyValue(Bytes.toBytes("ToBeTruncated"), System.currentTimeMillis())); + edit = new QuorumEdit(Bytes.toBytes("ToBeTruncated")); txns = Arrays.asList(edit); long tmpIndex = curIndex + 1; long tmpOffset = writer.append(tmpIndex, - WALEdit.serializeToByteBuffer(txns, 1234567890L, - Compression.Algorithm.NONE)); + codec.serializeToByteBuffer(txns, 1234567890L)); Assert.assertEquals(filePosition, tmpOffset); writer.truncate(tmpOffset); @@ -108,14 +111,12 @@ public class TestLogWriterAndReader { // Read the transactions and verify for (long i = initialIndex; i < numTXNs; i++) { MemoryBuffer buffer = reader.seekAndRead(i, arena); - txns = WALEdit.deserializeFromByteBuffer(buffer.getBuffer()); + txns = codec.deserializeFromByteBuffer(buffer.getBuffer()); edit = txns.get(0); - KeyValue kv = edit.getKeyValues().get(0); - KeyValue expectedKV = new KeyValue(Bytes.toBytes(i), i); Assert.assertEquals(1, txns.size()); - Assert.assertEquals(1, edit.size()); - Assert.assertEquals(0, comparator.compare(expectedKV, kv)); + + Assert.assertEquals(0, edit.compareTo(new QuorumEdit(Bytes.toBytes(i)))); arena.freeByteBuffer(buffer); } diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestRandomAccessLog.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestRandomAccessLog.java index 93ee82e..6ab745e 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestRandomAccessLog.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestRandomAccessLog.java @@ -22,10 +22,10 @@ package org.apache.hadoop.hbase.consensus; import junit.framework.Assert; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.consensus.log.Edit; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; +import org.apache.hadoop.hbase.consensus.log.QuorumEditCodec; import org.apache.hadoop.hbase.consensus.log.RandomAccessLog; -import org.apache.hadoop.hbase.io.hfile.Compression; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.util.Arena; import org.apache.hadoop.hbase.util.BucketAllocator; import org.apache.hadoop.hbase.util.Bytes; @@ -39,10 +39,13 @@ import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.Random; public class TestRandomAccessLog { File file; + private static Random random = new Random(System.currentTimeMillis()); + private static QuorumEditCodec codec = new QuorumEditCodec(); private final Arena arena = new InHeapArena(BucketAllocator.DEFAULT_BUCKETS, HConstants.ARENA_CAPACITY_DEFAULT); @@ -73,10 +76,9 @@ public class TestRandomAccessLog { final String readSessionKey = "test"; for (int i = startIndex; i <= endIndex; i++) { - WALEdit edit = new WALEdit(); - edit.add(new KeyValue(Bytes.toBytes("test" + i), System.currentTimeMillis())); - log.append(term, i, WALEdit.serializeToByteBuffer(Arrays.asList(edit), - 1234567890L, Compression.Algorithm.NONE)); + Edit edit = new QuorumEdit(Bytes.toBytes("test" + i)); + log.append(term, i, codec.serializeToByteBuffer(Arrays.asList(edit), + 1234567890L)); } Assert.assertEquals(term, log.getCurrentTerm()); @@ -102,11 +104,10 @@ public class TestRandomAccessLog { for (int i = startIndex; i <= middleIndex; i++) { MemoryBuffer buffer = log2.getTransaction(term, i, readSessionKey, arena); - List txns = WALEdit.deserializeFromByteBuffer(buffer.getBuffer()); + List txns = codec.deserializeFromByteBuffer(buffer.getBuffer()); Assert.assertEquals(1, txns.size()); - Assert.assertEquals(1, txns.get(0).getKeyValues().size()); - byte[] row = txns.get(0).getKeyValues().get(0).getRow(); - Assert.assertEquals(0, Bytes.compareTo(Bytes.toBytes("test" + i), row)); + Assert.assertEquals(0, txns.get(0).compareTo( + new QuorumEdit(Bytes.toBytes("test" + i)))); arena.freeByteBuffer(buffer); } } diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestReadOnlyLog.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestReadOnlyLog.java index f3f8484..8bf0b3d 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestReadOnlyLog.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/TestReadOnlyLog.java @@ -22,12 +22,12 @@ package org.apache.hadoop.hbase.consensus; import junit.framework.Assert; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.consensus.log.Edit; import org.apache.hadoop.hbase.consensus.log.LogWriter; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; +import org.apache.hadoop.hbase.consensus.log.QuorumEditCodec; import org.apache.hadoop.hbase.consensus.log.RandomAccessLog; import org.apache.hadoop.hbase.consensus.log.ReadOnlyLog; -import org.apache.hadoop.hbase.io.hfile.Compression; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.util.Arena; import org.apache.hadoop.hbase.util.BucketAllocator; import org.apache.hadoop.hbase.util.Bytes; @@ -56,20 +56,20 @@ public class TestReadOnlyLog { private static final int TOTAL_COMMIT = 100; private static final long INITIAL_TERM = 1; private static final long INITIAL_INDEX = 1; - private static final KeyValue.KVComparator comparator = new KeyValue.KVComparator(); private static final int CONCURRENT_READER_CNT = 10; private final AtomicInteger SUCCESS_CNT = new AtomicInteger(0); private static Random random; private static File file; private static ReadOnlyLog readOnlyLog; + private static QuorumEditCodec codec = new QuorumEditCodec(); private final Arena arena = new InHeapArena(BucketAllocator.DEFAULT_BUCKETS, HConstants.ARENA_CAPACITY_DEFAULT); @BeforeClass public static void setUpBeforeClass() throws Exception { - random = new Random(); + random = new Random(System.currentTimeMillis()); file = new File("TestReadOnlyLog_" + INITIAL_INDEX + "_" + INITIAL_INDEX); file.createNewFile(); readOnlyLog = new ReadOnlyLog(file, INITIAL_TERM, INITIAL_INDEX); @@ -117,16 +117,14 @@ public class TestReadOnlyLog { MemoryBuffer buffer = readOnlyLog.getTransaction(INITIAL_TERM, i, sessionKey, arena); // Read the commit entry - List txns = WALEdit.deserializeFromByteBuffer(buffer.getBuffer()); + List txns = codec.deserializeFromByteBuffer(buffer.getBuffer()); - WALEdit edit = txns.get(0); - KeyValue kv = edit.getKeyValues().get(0); - KeyValue expectedKV = new KeyValue(Bytes.toBytes(i), i); + Edit edit = txns.get(0); // Verify the commit entry Assert.assertEquals(1, txns.size()); - Assert.assertEquals(1, edit.size()); - Assert.assertEquals(0, comparator.compare(expectedKV, kv)); + Assert.assertEquals(0, edit.compareTo(new QuorumEdit( + Bytes.toBytes(i)))); arena.freeByteBuffer(buffer); } @@ -151,14 +149,12 @@ public class TestReadOnlyLog { writer.writeFileHeader(term, initialIndex); // Write the numTXNs to the log file - List txns; - WALEdit edit; + List txns; + Edit edit; for (long i = initialIndex; i < TOTAL_COMMIT; i++) { - edit = new WALEdit(); - edit.add(new KeyValue(Bytes.toBytes(i), i)); + edit = new QuorumEdit(Bytes.toBytes(i)); txns = Arrays.asList(edit); - writer.append(i, WALEdit.serializeToByteBuffer(txns, 1234567890L, - Compression.Algorithm.NONE)); + writer.append(i, codec.serializeToByteBuffer(txns, 1234567890L)); } // Close the writer diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/fsm/TestAsyncStatesInRaftStateMachine.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/fsm/TestAsyncStatesInRaftStateMachine.java index 3631129..14bb5c4 100644 --- a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/fsm/TestAsyncStatesInRaftStateMachine.java +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/fsm/TestAsyncStatesInRaftStateMachine.java @@ -23,16 +23,14 @@ package org.apache.hadoop.hbase.consensus.fsm; import com.google.common.util.concurrent.SettableFuture; import junit.framework.Assert; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.consensus.RaftTestUtil; import org.apache.hadoop.hbase.consensus.client.QuorumClient; +import org.apache.hadoop.hbase.consensus.log.QuorumEdit; import org.apache.hadoop.hbase.consensus.quorum.QuorumInfo; import org.apache.hadoop.hbase.consensus.quorum.RaftQuorumContext; import org.apache.hadoop.hbase.consensus.raft.states.RaftStateType; import org.apache.hadoop.hbase.consensus.server.LocalConsensusServer; import org.apache.hadoop.hbase.consensus.server.peer.PeerServer; -import org.apache.hadoop.hbase.regionserver.wal.WALEdit; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; import org.junit.After; import org.junit.Test; @@ -49,7 +47,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertTrue; import static junit.framework.Assert.fail; public class TestAsyncStatesInRaftStateMachine { @@ -243,7 +240,7 @@ public class TestAsyncStatesInRaftStateMachine { RAFT_TEST_UTIL.dumpStates(quorumInfo); for (int i = 0; i < numCommits; i++) { - client.replicateCommits(Arrays.asList(generateTestingWALEdit())); + client.replicateCommits(Arrays.asList(generateTestingEdit())); } RAFT_TEST_UTIL.dumpStates(quorumInfo); @@ -253,8 +250,7 @@ public class TestAsyncStatesInRaftStateMachine { } } - private static WALEdit generateTestingWALEdit() { - KeyValue kv = KeyValue.createFirstOnRow(Bytes.toBytes("TestQuorum")); - return new WALEdit(Arrays.asList(kv)); + private static QuorumEdit generateTestingEdit() { + return RAFT_TEST_UTIL.generateTransaction(1, 100).get(0); } } diff --git a/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/log/TestQuorumEditCodec.java b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/log/TestQuorumEditCodec.java new file mode 100644 index 0000000..6f9d764 --- /dev/null +++ b/hbase-consensus/src/test/java/org/apache/hadoop/hbase/consensus/log/TestQuorumEditCodec.java @@ -0,0 +1,23 @@ +package org.apache.hadoop.hbase.consensus.log; + +import junit.framework.Assert; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; + +public class TestQuorumEditCodec { + private static QuorumEditCodec codec = new QuorumEditCodec(); + + @Test + public void testBasic() throws IOException { + Edit edit = new QuorumEdit(Bytes.toBytes("test")); + ByteBuffer buffer = codec.serializeToByteBuffer(Arrays.asList(edit), + 1234567890L); + Assert.assertEquals(0, + codec.deserializeFromByteBuffer(buffer).get(0).compareTo( + new QuorumEdit(Bytes.toBytes("test")))); + } +} -- 2.1.0