diff --git hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java index abe9bf5..fbc6726 100644 --- hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java +++ hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java @@ -62,4 +62,22 @@ public class RegionReplicaUtil { return getRegionInfoForReplica(regionInfo, DEFAULT_REPLICA_ID); } + /** + * Returns whether this region replica can accept writes. + * @param regionInfo the regionInfo for the region replica. + * @return whether the replica is read only + */ + public static boolean isReadOnly(HRegionInfo regionInfo) { + return regionInfo.getReplicaId() != DEFAULT_REPLICA_ID; + } + + /** + * Returns the regionInfo object to use for interacting with the file system. By default + * the region replicas return the primary region's info object so that they can refer to + * it's files. + * @return An HRegionInfo object to interact with the filesystem + */ + public static HRegionInfo getRegionInfoForFs(HRegionInfo regionInfo) { + return getRegionInfoForDefaultReplica(regionInfo); + } } diff --git hbase-common/src/main/resources/hbase-default.xml hbase-common/src/main/resources/hbase-default.xml index e2f1ecd..b036af1 100644 --- hbase-common/src/main/resources/hbase-default.xml +++ hbase-common/src/main/resources/hbase-default.xml @@ -1150,4 +1150,12 @@ possible configurations would overwhelm and obscure the important. procedure. After implementing your own MasterProcedureManager, just put it in HBase's classpath and add the fully qualified class name here. + + hbase.regionserver.storefile.refresh.period + 0 + + The period (in milliseconds) for refreshing the store files for the secondary regions. 0 means this + feature is disabled. + + diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index ecb4996..5dd7464 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -92,6 +92,7 @@ import org.apache.hadoop.hbase.client.Increment; import org.apache.hadoop.hbase.client.IsolationLevel; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Scan; @@ -195,7 +196,7 @@ public class HRegion implements HeapSize { // , Writable{ public static final String LOAD_CFS_ON_DEMAND_CONFIG_KEY = "hbase.hregion.scan.loadColumnFamiliesOnDemand"; - + /** * This is the global default value for durability. All tables/mutations not * defining a durability or using USE_DEFAULT will default to this value. @@ -399,7 +400,7 @@ public class HRegion implements HeapSize { // , Writable{ private RegionServerAccounting rsAccounting; private List> recentFlushes = new ArrayList>(); private long flushCheckInterval; - // flushPerChanges is to prevent too many changes in memstore + // flushPerChanges is to prevent too many changes in memstore private long flushPerChanges; private long blockingMemStoreSize; final long threadWakeFrequency; @@ -426,6 +427,8 @@ public class HRegion implements HeapSize { // , Writable{ private final MetricsRegionWrapperImpl metricsRegionWrapper; private final Durability durability; + private volatile boolean isStale; // whether the region replica is too stale to serve reads + /** * HRegion constructor. This constructor should only be used for testing and * extensions. Instances of HRegion should be instantiated with the @@ -499,7 +502,7 @@ public class HRegion implements HeapSize { // , Writable{ throw new IllegalArgumentException(MEMSTORE_FLUSH_PER_CHANGES + " can not exceed " + MAX_FLUSH_PER_CHANGES); } - + this.rowLockWaitDuration = conf.getInt("hbase.rowlock.wait.duration", DEFAULT_ROWLOCK_WAIT_DURATION); @@ -644,7 +647,9 @@ public class HRegion implements HeapSize { // , Writable{ fs.cleanupAnySplitDetritus(); fs.cleanupMergesDir(); - this.writestate.setReadOnly(this.htableDescriptor.isReadOnly()); + this.writestate.setReadOnly( + this.htableDescriptor.isReadOnly() + || RegionReplicaUtil.isReadOnly(this.getRegionInfo())); this.writestate.flushRequested = false; this.writestate.compacting = 0; @@ -737,7 +742,7 @@ public class HRegion implements HeapSize { // , Writable{ for (Store store : this.stores.values()) { try { store.close(); - } catch (IOException e) { + } catch (IOException e) { LOG.warn(e.getMessage()); } } @@ -2018,6 +2023,7 @@ public class HRegion implements HeapSize { // , Writable{ this.nonce = nonce; } + @Override public Mutation getMutation(int index) { return this.operations[index]; } @@ -3885,7 +3891,7 @@ public class HRegion implements HeapSize { // , Writable{ if (filter != null && filter.hasFilterRow()) { filter.filterRowCells(results); } - + if (isEmptyRow || filterRow()) { results.clear(); boolean moreRows = nextRow(currentRow, offset, length); @@ -3953,7 +3959,7 @@ public class HRegion implements HeapSize { // , Writable{ return filter != null && (!filter.hasFilterRow()) && filter.filterRow(); } - + private boolean filterRowKey(byte[] row, int offset, short length) throws IOException { return filter != null && filter.filterRowKey(row, offset, length); @@ -5624,7 +5630,7 @@ public class HRegion implements HeapSize { // , Writable{ * modifies data. It has to be called just before a try. * #closeRegionOperation needs to be called in the try's finally block * Acquires a read lock and checks if the region is closing or closed. - * @throws IOException + * @throws IOException */ public void startRegionOperation() throws IOException { startRegionOperation(Operation.ANY); @@ -5632,7 +5638,7 @@ public class HRegion implements HeapSize { // , Writable{ /** * @param op The operation is about to be taken on the region - * @throws IOException + * @throws IOException */ protected void startRegionOperation(Operation op) throws IOException { switch (op) { @@ -5651,6 +5657,9 @@ public class HRegion implements HeapSize { // , Writable{ (op != Operation.PUT && op != Operation.DELETE && op != Operation.BATCH_MUTATE))) { throw new RegionInRecoveryException(this.getRegionNameAsString() + " is recovering"); } + if (this.isStale) { + throw new IOException ("The region's files are stale. Cannot serve the request"); + } break; default: break; @@ -5682,7 +5691,7 @@ public class HRegion implements HeapSize { // , Writable{ /** * Closes the lock. This needs to be called in the finally block corresponding * to the try block of #startRegionOperation - * @throws IOException + * @throws IOException */ public void closeRegionOperation() throws IOException { closeRegionOperation(Operation.ANY); @@ -5943,6 +5952,10 @@ public class HRegion implements HeapSize { // , Writable{ this.sequenceId.set(value); } + public void setStale(boolean isStale) { + this.isStale = isStale; + } + /** * Listener class to enable callers of * bulkLoadHFile() to perform any necessary diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java index 2929f47..512a0cc 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.backup.HFileArchiver; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.Reference; import org.apache.hadoop.hbase.util.Bytes; @@ -72,10 +73,12 @@ public class HRegionFileSystem { private static final String REGION_TEMP_DIR = ".tmp"; private final HRegionInfo regionInfo; + //regionInfo for interacting with FS (getting encodedName, etc) + private final HRegionInfo regionInfoForFs; private final Configuration conf; private final Path tableDir; private final FileSystem fs; - + /** * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the * client level. @@ -98,6 +101,10 @@ public class HRegionFileSystem { this.conf = conf; this.tableDir = tableDir; this.regionInfo = regionInfo; + if (regionInfo != null) { + // refer to the primary replica on disk + this.regionInfoForFs = RegionReplicaUtil.getRegionInfoForFs(regionInfo); + } else this.regionInfoForFs = null; this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number", DEFAULT_HDFS_CLIENT_RETRIES_NUMBER); this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries", @@ -121,7 +128,7 @@ public class HRegionFileSystem { /** @return {@link Path} to the region directory. */ public Path getRegionDir() { - return new Path(this.tableDir, this.regionInfo.getEncodedName()); + return new Path(this.tableDir, this.regionInfoForFs.getEncodedName()); } // =========================================================================== @@ -203,6 +210,7 @@ public class HRegionFileSystem { public boolean hasReferences(final String familyName) throws IOException { FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName), new PathFilter () { + @Override public boolean accept(Path path) { return StoreFileInfo.isReference(path); } @@ -249,14 +257,14 @@ public class HRegionFileSystem { */ public void deleteFamily(final String familyName) throws IOException { // archive family store files - HFileArchiver.archiveFamily(fs, conf, regionInfo, tableDir, Bytes.toBytes(familyName)); + HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName)); // delete the family folder Path familyDir = getStoreDir(familyName); if(fs.exists(familyDir) && !deleteDir(familyDir)) throw new IOException("Could not delete family " + familyName - + " from FileSystem for region " + regionInfo.getRegionNameAsString() + "(" - + regionInfo.getEncodedName() + ")"); + + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "(" + + regionInfoForFs.getEncodedName() + ")"); } /** @@ -326,7 +334,7 @@ public class HRegionFileSystem { Path storeDir = getStoreDir(familyName); if(!fs.exists(storeDir) && !createDir(storeDir)) throw new IOException("Failed creating " + storeDir); - + String name = buildPath.getName(); if (generateNewName) { name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_"); @@ -366,7 +374,7 @@ public class HRegionFileSystem { */ public void removeStoreFile(final String familyName, final Path filePath) throws IOException { - HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfo, + HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs, this.tableDir, Bytes.toBytes(familyName), filePath); } @@ -378,7 +386,7 @@ public class HRegionFileSystem { */ public void removeStoreFiles(final String familyName, final Collection storeFiles) throws IOException { - HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfo, + HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs, this.tableDir, Bytes.toBytes(familyName), storeFiles); } @@ -528,16 +536,16 @@ public class HRegionFileSystem { */ Path splitStoreFile(final HRegionInfo hri, final String familyName, final StoreFile f, final byte[] splitRow, final boolean top) throws IOException { - + // Check whether the split row lies in the range of the store file // If it is outside the range, return directly. if (top) { //check if larger than last key. KeyValue splitKey = KeyValue.createFirstOnRow(splitRow); - byte[] lastKey = f.createReader().getLastKey(); + byte[] lastKey = f.createReader().getLastKey(); // If lastKey is null means storefile is empty. if (lastKey == null) return null; - if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(), + if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(), splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) { return null; } @@ -547,14 +555,14 @@ public class HRegionFileSystem { byte[] firstKey = f.createReader().getFirstKey(); // If firstKey is null means storefile is empty. if (firstKey == null) return null; - if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(), + if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(), splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) { return null; - } + } } - + f.getReader().close(true); - + Path splitDir = new Path(getSplitsDir(hri), familyName); // A reference to the bottom half of the hsf store file. Reference r = @@ -563,7 +571,7 @@ public class HRegionFileSystem { // See REF_NAME_REGEX regex above. The referred-to regions name is // up in the path of the passed in f -- parentdir is family, // then the directory above is the region name. - String parentRegionName = regionInfo.getEncodedName(); + String parentRegionName = regionInfoForFs.getEncodedName(); // Write reference with same file id only with the other region name as // suffix and into the new region location (under same family). Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName); @@ -636,12 +644,12 @@ public class HRegionFileSystem { Path referenceDir = new Path(new Path(mergedDir, mergedRegion.getEncodedName()), familyName); // A whole reference to the store file. - Reference r = Reference.createTopReference(regionInfo.getStartKey()); + Reference r = Reference.createTopReference(regionInfoForFs.getStartKey()); // Add the referred-to regions name as a dot separated suffix. // See REF_NAME_REGEX regex above. The referred-to regions name is // up in the path of the passed in f -- parentdir is family, // then the directory above is the region name. - String mergingRegionName = regionInfo.getEncodedName(); + String mergingRegionName = regionInfoForFs.getEncodedName(); // Write reference with same file id only with the other region name as // suffix and into the new region location (under same family). Path p = new Path(referenceDir, f.getPath().getName() + "." @@ -653,7 +661,7 @@ public class HRegionFileSystem { * Commit a merged region, moving it from the merges temporary directory to * the proper location in the filesystem. * @param mergedRegionInfo merged region {@link HRegionInfo} - * @throws IOException + * @throws IOException */ void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException { Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName()); @@ -731,7 +739,7 @@ public class HRegionFileSystem { // pb version is much shorter -- we write now w/o the toString version -- so checking length // only should be sufficient. I don't want to read the file every time to check if it pb // serialized. - byte[] content = getRegionInfoFileContent(regionInfo); + byte[] content = getRegionInfoFileContent(regionInfoForFs); try { Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); @@ -747,7 +755,7 @@ public class HRegionFileSystem { throw new IOException("Unable to remove existing " + regionInfoFile); } } catch (FileNotFoundException e) { - LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfo.getEncodedName()); + LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName()); } // Write HRI to a file in case we need to recover hbase:meta @@ -759,7 +767,7 @@ public class HRegionFileSystem { * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation. */ private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException { - byte[] content = getRegionInfoFileContent(regionInfo); + byte[] content = getRegionInfoFileContent(regionInfoForFs); writeRegionInfoOnFilesystem(content, useTempDir); } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 039ced7..465d691 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -50,7 +50,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import javax.management.ObjectName; -import com.google.protobuf.HBaseZeroCopyByteString; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -235,6 +234,7 @@ import org.cliffc.high_scale_lib.Counter; import com.google.protobuf.BlockingRpcChannel; import com.google.protobuf.ByteString; +import com.google.protobuf.HBaseZeroCopyByteString; import com.google.protobuf.Message; import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; @@ -482,6 +482,9 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa */ private final int scannerLeaseTimeoutPeriod; + // chore for refreshing store files for secondary regions + private StorefileRefresherChore storefileRefresher; + /** * The reference to the priority extraction function */ @@ -821,6 +824,12 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa this.isa.getAddress(), 0)); this.pauseMonitor = new JvmPauseMonitor(conf); pauseMonitor.start(); + + int storefileRefreshPeriod = conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD + , StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD); + if (storefileRefreshPeriod > 0) { + this.storefileRefresher = new StorefileRefresherChore(storefileRefreshPeriod, this, this); + } } /** @@ -946,6 +955,12 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa if (this.nonceManagerChore != null) { this.nonceManagerChore.interrupt(); } + if (this.healthCheckChore != null) { + this.healthCheckChore.interrupt(); + } + if (this.storefileRefresher != null) { + this.storefileRefresher.interrupt(); + } // Stop the snapshot and other procedure handlers, forcefully killing all running tasks rspmHost.stop(this.abortRequested || this.killed); @@ -1608,6 +1623,10 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa Threads.setDaemonThreadRunning(this.nonceManagerChore.getThread(), n + ".nonceCleaner", uncaughtExceptionHandler); } + if (this.storefileRefresher != null) { + Threads.setDaemonThreadRunning(this.storefileRefresher.getThread(), n + ".storefileRefresher", + uncaughtExceptionHandler); + } // Leases is not a Thread. Internally it runs a daemon thread. If it gets // an unhandled exception, it will just exit. @@ -1894,6 +1913,9 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa this.replicationSinkHandler.stopReplicationService(); } } + if (this.storefileRefresher != null) { + Threads.shutdown(this.storefileRefresher.getThread()); + } } /** diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index f7dfb17..4df3df4 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -26,6 +26,8 @@ import java.security.KeyException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.NavigableSet; @@ -90,6 +92,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; /** * A Store holds a column family in a Region. Its a memstore and a set of zero @@ -474,10 +477,13 @@ public class HStore implements Store { */ private List loadStoreFiles() throws IOException { Collection files = fs.getStoreFiles(getColumnFamilyName()); + return openStoreFiles(files); + } + + private List openStoreFiles(Collection files) throws IOException { if (files == null || files.size() == 0) { return new ArrayList(); } - // initialize the thread pool for opening store files in parallel.. ThreadPoolExecutor storeFileOpenerThreadPool = this.region.getStoreFileOpenAndCloseThreadPool("StoreFileOpenerThread-" + @@ -527,7 +533,7 @@ public class HStore implements Store { for (StoreFile file : results) { try { if (file != null) file.closeReader(true); - } catch (IOException e) { + } catch (IOException e) { LOG.warn(e.getMessage()); } } @@ -537,6 +543,53 @@ public class HStore implements Store { return results; } + /** + * Checks the underlying store files, and opens the files that have not + * been opened, and removes the store file readers for store files no longer + * available. Mainly used by secondary region replicas to keep up to date with + * the primary region files. + * @throws IOException + */ + @Override + public void refreshStoreFiles() throws IOException { + StoreFileManager sfm = storeEngine.getStoreFileManager(); + Collection currentFiles = sfm.getStorefiles(); + if (currentFiles == null) currentFiles = new ArrayList(0); + + Collection newFiles = fs.getStoreFiles(getColumnFamilyName()); + if (newFiles == null) newFiles = new ArrayList(0); + + HashMap currentFilesSet = new HashMap(currentFiles.size()); + for (StoreFile sf : currentFiles) { + currentFilesSet.put(sf.getFileInfo(), sf); + } + HashSet newFilesSet = new HashSet(newFiles); + + Set toBeAddedFiles = Sets.difference(newFilesSet, currentFilesSet.keySet()); + Set toBeRemovedFiles = Sets.difference(currentFilesSet.keySet(), newFilesSet); + + if (toBeAddedFiles.isEmpty() && toBeRemovedFiles.isEmpty()) { + return; + } + + LOG.info("Refreshing store files for region " + this.getRegionInfo().getRegionNameAsString() + + " files to add: " + toBeAddedFiles + " files to remove: " + toBeRemovedFiles); + + Set toBeRemovedStoreFiles = new HashSet(toBeRemovedFiles.size()); + for (StoreFileInfo sfi : toBeRemovedFiles) { + toBeRemovedStoreFiles.add(currentFilesSet.get(sfi)); + } + + // try to open the files + List openedFiles = openStoreFiles(toBeAddedFiles); + + // propogate the file changes to the underlying store file manager + replaceStoreFiles(toBeRemovedStoreFiles, openedFiles); //won't throw an exception + + // notify scanners, close file readers, and recompute store size + completeCompaction(toBeRemovedStoreFiles, false); + } + private StoreFile createStoreFileAndReader(final Path p) throws IOException { StoreFileInfo info = new StoreFileInfo(conf, this.getFileSystem(), p); info.setRegionCoprocessorHost(this.region.getCoprocessorHost()); @@ -1094,7 +1147,7 @@ public class HStore implements Store { writeCompactionWalRecord(filesToCompact, sfs); replaceStoreFiles(filesToCompact, sfs); // At this point the store will use new files for all new scanners. - completeCompaction(filesToCompact); // Archive old files & update store size. + completeCompaction(filesToCompact, true); // Archive old files & update store size. } finally { finishCompactionRequest(cr); } @@ -1148,7 +1201,8 @@ public class HStore implements Store { this.region.getRegionInfo(), compactionDescriptor, this.region.getSequenceId()); } - private void replaceStoreFiles(final Collection compactedFiles, + @VisibleForTesting + void replaceStoreFiles(final Collection compactedFiles, final Collection result) throws IOException { this.lock.writeLock().lock(); try { @@ -1301,7 +1355,7 @@ public class HStore implements Store { this.getCoprocessorHost().postCompact(this, sf, null); } replaceStoreFiles(filesToCompact, Lists.newArrayList(sf)); - completeCompaction(filesToCompact); + completeCompaction(filesToCompact, true); } } finally { synchronized (filesCompacting) { @@ -1483,6 +1537,28 @@ public class HStore implements Store { */ @VisibleForTesting protected void completeCompaction(final Collection compactedFiles) + throws IOException { + completeCompaction(compactedFiles, true); + } + + + /* + *

It works by processing a compaction that's been written to disk. + * + *

It is usually invoked at the end of a compaction, but might also be + * invoked at HStore startup, if the prior execution died midway through. + * + *

Moving the compacted TreeMap into place means: + *

+   * 1) Unload all replaced StoreFile, close and collect list to delete.
+   * 2) Compute new store size
+   * 
+ * + * @param compactedFiles list of files that were compacted + * @param newFile StoreFile that is the result of the compaction + */ + @VisibleForTesting + protected void completeCompaction(final Collection compactedFiles, boolean removeFiles) throws IOException { try { // Do not delete old store files until we have sent out notification of @@ -1497,7 +1573,9 @@ public class HStore implements Store { for (StoreFile compactedFile : compactedFiles) { compactedFile.closeReader(true); } - this.fs.removeStoreFiles(this.getColumnFamilyName(), compactedFiles); + if (removeFiles) { + this.fs.removeStoreFiles(this.getColumnFamilyName(), compactedFiles); + } } catch (IOException e) { e = RemoteExceptionHandler.checkIOException(e); LOG.error("Failed removing compacted files in " + this + diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java index 798979b..2deee3b 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -27,10 +27,10 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.compress.Compression; @@ -349,4 +349,13 @@ public interface Store extends HeapSize, StoreConfigInformation { * @return Whether this store has too many store files. */ boolean hasTooManyStoreFiles(); + + /** + * Checks the underlying store files, and opens the files that have not + * been opened, and removes the store file readers for store files no longer + * available. Mainly used by secondary region replicas to keep up to date with + * the primary region files. + * @throws IOException + */ + void refreshStoreFiles() throws IOException; } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java index 2d8a8ea..6d0c714 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileInfo.java @@ -43,7 +43,7 @@ import org.apache.hadoop.hbase.util.FSUtils; * Describe a StoreFile (hfile, reference, link) */ @InterfaceAudience.Private -public class StoreFileInfo { +public class StoreFileInfo implements Comparable { public static final Log LOG = LogFactory.getLog(StoreFileInfo.class); /** @@ -390,4 +390,27 @@ public class StoreFileInfo { } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); } + + @Override + public boolean equals(Object that) { + if (that == null) { + return false; + } + + if (that instanceof StoreFileInfo) { + return this.compareTo((StoreFileInfo)that) == 0; + } + + return false; + }; + + @Override + public int compareTo(StoreFileInfo o) { + return this.fileStatus.compareTo(o.fileStatus); + } + + @Override + public int hashCode() { + return this.fileStatus.hashCode(); + } } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StorefileRefresherChore.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StorefileRefresherChore.java new file mode 100644 index 0000000..f9d0e44 --- /dev/null +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StorefileRefresherChore.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.Chore; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.util.StringUtils; + +/** + * A chore for refreshing the store files for secondary regions hosted in the region server + */ +public class StorefileRefresherChore extends Chore { + + private static final Log LOG = LogFactory.getLog(StorefileRefresherChore.class); + + /** + * The period (in milliseconds) for refreshing the store files for the secondary regions. + */ + static final String REGIONSERVER_STOREFILE_REFRESH_PERIOD + = "hbase.regionserver.storefile.refresh.period"; + static final int DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD = 0; //disabled by default + + private HRegionServer regionServer; + private long hfileTtl; + private int period; + + //ts of last time regions store files are refreshed + private Map lastRefreshTimes; // encodedName -> long + + public StorefileRefresherChore(int period, HRegionServer regionServer, Stoppable stoppable) { + super("StorefileRefresherChore", period, stoppable); + this.period = period; + this.regionServer = regionServer; + this.hfileTtl = this.regionServer.getConfiguration().getLong( + TimeToLiveHFileCleaner.TTL_CONF_KEY, TimeToLiveHFileCleaner.DEFAULT_TTL); + if (period > hfileTtl / 2) { + throw new RuntimeException(REGIONSERVER_STOREFILE_REFRESH_PERIOD + + " should be set smaller than half of " + TimeToLiveHFileCleaner.TTL_CONF_KEY); + } + lastRefreshTimes = new HashMap(); + } + + @Override + protected void chore() { + for (HRegion r : regionServer.getOnlineRegionsLocalContext()) { + if (!RegionReplicaUtil.isReadOnly(r.getRegionInfo())) { + // skip checking for this region if it can accept writes + continue; + } + String encodedName = r.getRegionInfo().getEncodedName(); + long time = EnvironmentEdgeManager.currentTimeMillis(); + if (!lastRefreshTimes.containsKey(encodedName)) { + lastRefreshTimes.put(encodedName, time); + } + try { + for (Store store : r.getStores().values()) { + // TODO: some stores might see new data from flush, while others do not which + // MIGHT break atomic edits across column families. We can fix this with setting + // mvcc read numbers that we know every store has seen + store.refreshStoreFiles(); + } + } catch (IOException ex) { + LOG.warn("Exception while trying to refresh store files for region:" + r.getRegionInfo() + + ", exception:" + StringUtils.stringifyException(ex)); + + // Store files have a TTL in the archive directory. If we fail to refresh for that long, we stop serving reads + if (isRegionStale(encodedName, time)) { + r.setStale(true); // stop serving reads + } + continue; + } + lastRefreshTimes.put(encodedName, time); + r.setStale(false); + } + + // remove closed regions + for (String encodedName : lastRefreshTimes.keySet()) { + if (regionServer.getFromOnlineRegions(encodedName) == null) { + lastRefreshTimes.remove(encodedName); + } + } + } + + protected boolean isRegionStale(String encodedName, long time) { + long lastRefreshTime = lastRefreshTimes.get(encodedName); + return time - lastRefreshTime > hfileTtl - period; + } +} diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index be16e30..67f8197 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -1770,6 +1770,15 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { } } + public void deleteNumericRows(final HTable t, final byte[] f, int startRow, int endRow) throws IOException { + for (int i = startRow; i < endRow; i++) { + byte[] data = Bytes.toBytes(String.valueOf(i)); + Delete delete = new Delete(data); + delete.deleteFamily(f); + t.delete(delete); + } + } + /** * Return the number of rows in the given table. */ diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index e518074..c4238b7 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -136,7 +136,7 @@ import com.google.common.collect.Lists; /** * Basic stand-alone testing of HRegion. - * + * * A lot of the meta information for an HRegion now lives inside other HRegions * or in the HBaseMaster, so only basic testing is possible. */ @@ -151,7 +151,7 @@ public class TestHRegion { private static final String COLUMN_FAMILY = "MyCF"; HRegion region = null; - private static HBaseTestingUtility TEST_UTIL; // do not run unit tests in parallel + private static HBaseTestingUtility TEST_UTIL; // do not run unit tests in parallel public static Configuration conf ; private String DIR; private static FileSystem fs; @@ -191,7 +191,7 @@ public class TestHRegion { String getName() { return name.getMethodName(); } - + // //////////////////////////////////////////////////////////////////////////// // New tests that doesn't spin up a mini cluster but rather just test the // individual code pieces in the HRegion. Putting files locally in @@ -1971,7 +1971,7 @@ public class TestHRegion { /** * This method tests https://issues.apache.org/jira/browse/HBASE-2516. - * + * * @throws IOException */ @Test @@ -2540,7 +2540,7 @@ public class TestHRegion { /** * Added for HBASE-5416 - * + * * Here we test scan optimization when only subset of CFs are used in filter * conditions. */ @@ -2609,7 +2609,7 @@ public class TestHRegion { /** * HBASE-5416 - * + * * Test case when scan limits amount of KVs returned on each next() call. */ @Test @@ -2703,7 +2703,7 @@ public class TestHRegion { // //////////////////////////////////////////////////////////////////////////// /** * Splits twice and verifies getting from each of the split regions. - * + * * @throws Exception */ @Test @@ -2835,7 +2835,7 @@ public class TestHRegion { * Flushes the cache in a thread while scanning. The tests verify that the * scan is coherent - e.g. the returned results are always of the same or * later update as the previous results. - * + * * @throws IOException * scan / compact * @throws InterruptedException @@ -2957,7 +2957,7 @@ public class TestHRegion { /** * Writes very wide records and scans for the latest every time.. Flushes and * compacts the region every now and then to keep things realistic. - * + * * @throws IOException * by flush / scan / compaction * @throws InterruptedException @@ -3115,7 +3115,7 @@ public class TestHRegion { /** * Writes very wide records and gets the latest row every time.. Flushes and * compacts the region aggressivly to catch issues. - * + * * @throws IOException * by flush / scan / compaction * @throws InterruptedException @@ -3500,7 +3500,7 @@ public class TestHRegion { /** * Testcase to check state of region initialization task set to ABORTED or not * if any exceptions during initialization - * + * * @throws Exception */ @Test @@ -3923,7 +3923,116 @@ public class TestHRegion { region.close(); } + @Test + public void testRegionReplicaSecondary() throws IOException { + // create a primary region, load some data and flush + // create a secondary region, and do a get against that + Path rootDir = new Path(DIR + "testRegionReplicaSecondary"); + + byte[][] families = new byte[][] { + Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3") + }; + byte[] cq = Bytes.toBytes("cq"); + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("testRegionReplicaSecondary")); + for (byte[] family : families) { + htd.addFamily(new HColumnDescriptor(family)); + } + + long time = System.currentTimeMillis(); + HRegionInfo primaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 0); + HRegionInfo secondaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 1); + + HRegion primaryRegion = null, secondaryRegion = null; + + try { + primaryRegion = HRegion.createHRegion(primaryHri, + rootDir, TEST_UTIL.getConfiguration(), htd); + + // load some data + putData(primaryRegion, 0, 1000, cq, families); + + // flush region + primaryRegion.flushcache(); + + // open secondary region + secondaryRegion = HRegion.openHRegion(rootDir, secondaryHri, htd, null, conf); + + verifyData(secondaryRegion, 0, 1000, cq, families); + } finally { + if (primaryRegion != null) { + HRegion.closeHRegion(primaryRegion); + } + if (secondaryRegion != null) { + HRegion.closeHRegion(secondaryRegion); + } + } + } + + @Test + public void testRegionReplicaSecondaryIsReadOnly() throws IOException { + // create a primary region, load some data and flush + // create a secondary region, and do a put against that + Path rootDir = new Path(DIR + "testRegionReplicaSecondary"); + + byte[][] families = new byte[][] { + Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3") + }; + byte[] cq = Bytes.toBytes("cq"); + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("testRegionReplicaSecondary")); + for (byte[] family : families) { + htd.addFamily(new HColumnDescriptor(family)); + } + + long time = System.currentTimeMillis(); + HRegionInfo primaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 0); + HRegionInfo secondaryHri = new HRegionInfo(htd.getTableName(), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, + false, time, 1); + + HRegion primaryRegion = null, secondaryRegion = null; + + try { + primaryRegion = HRegion.createHRegion(primaryHri, + rootDir, TEST_UTIL.getConfiguration(), htd); + + // load some data + putData(primaryRegion, 0, 1000, cq, families); + + // flush region + primaryRegion.flushcache(); + + // open secondary region + secondaryRegion = HRegion.openHRegion(rootDir, secondaryHri, htd, null, conf); + + try { + putData(secondaryRegion, 0, 1000, cq, families); + fail("Should have thrown exception"); + } catch (IOException ex) { + // expected + } + } finally { + if (primaryRegion != null) { + HRegion.closeHRegion(primaryRegion); + } + if (secondaryRegion != null) { + HRegion.closeHRegion(secondaryRegion); + } + } + + } + private void putData(int startRow, int numRows, byte[] qf, byte[]... families) throws IOException { + putData(this.region, startRow, numRows, qf, families); + } + + private void putData(HRegion region, + int startRow, int numRows, byte[] qf, byte[]... families) throws IOException { for (int i = startRow; i < startRow + numRows; i++) { Put put = new Put(Bytes.toBytes("" + i)); put.setDurability(Durability.SKIP_WAL); @@ -3966,13 +4075,13 @@ public class TestHRegion { /* * Assert first value in the passed region is firstValue. - * + * * @param r - * + * * @param fs - * + * * @param firstValue - * + * * @throws IOException */ private void assertScan(final HRegion r, final byte[] fs, final byte[] firstValue) diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionReplicas.java hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionReplicas.java new file mode 100644 index 0000000..0b9bf2b --- /dev/null +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionReplicas.java @@ -0,0 +1,299 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.catalog.TestMetaReaderEditor; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.protobuf.RequestConverter; +import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; +import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import com.google.protobuf.ServiceException; + +/** + * Tests for region replicas. Sad that we cannot isolate these without bringing up a whole + * cluster. See {@link TestRegionServerNoMaster}. + */ +@Category(MediumTests.class) +public class TestRegionReplicas { + private static final int NB_SERVERS = 1; + private static HTable table; + private static final byte[] row = "TestRegionReplicas".getBytes(); + + private static HRegionInfo hriPrimary; + private static HRegionInfo hriSecondary; + + private static final HBaseTestingUtility HTU = new HBaseTestingUtility(); + private static final byte[] f = HConstants.CATALOG_FAMILY; + + @BeforeClass + public static void before() throws Exception { + HTU.startMiniCluster(NB_SERVERS); + final byte[] tableName = Bytes.toBytes(TestRegionReplicas.class.getSimpleName()); + + // Create table then get the single region for our new table. + table = HTU.createTable(tableName, f); + + hriPrimary = table.getRegionLocation(row, false).getRegionInfo(); + + // mock a secondary region info to open + hriSecondary = new HRegionInfo(hriPrimary.getTable(), hriPrimary.getStartKey(), + hriPrimary.getEndKey(), hriPrimary.isSplit(), hriPrimary.getRegionId(), 1); + + // No master + HTU.getHBaseCluster().getMaster().stopMaster(); + } + + @AfterClass + public static void afterClass() throws Exception { + table.close(); + HTU.shutdownMiniCluster(); + } + + @After + public void after() throws Exception { + // Clean the state if the test failed before cleaning the znode + // It does not manage all bad failures, so if there are multiple failures, only + // the first one should be looked at. + ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hriPrimary); + } + + private HRegionServer getRS() { + return HTU.getMiniHBaseCluster().getRegionServer(0); + } + + private void openRegion(HRegionInfo hri) throws Exception { + ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName()); + // first version is '0' + AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null); + AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr); + Assert.assertTrue(responseOpen.getOpeningStateCount() == 1); + Assert.assertTrue(responseOpen.getOpeningState(0). + equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED)); + checkRegionIsOpened(hri.getEncodedName()); + } + + private void closeRegion(HRegionInfo hri) throws Exception { + ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName()); + + AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest( + hri.getEncodedName(), true); + AdminProtos.CloseRegionResponse responseClose = getRS().closeRegion(null, crr); + Assert.assertTrue(responseClose.getClosed()); + + checkRegionIsClosed(hri.getEncodedName()); + + ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(), getRS().getServerName()); + } + + private void checkRegionIsOpened(String encodedRegionName) throws Exception { + + while (!getRS().getRegionsInTransitionInRS().isEmpty()) { + Thread.sleep(1); + } + + Assert.assertTrue(getRS().getRegionByEncodedName(encodedRegionName).isAvailable()); + + Assert.assertTrue( + ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), encodedRegionName, getRS().getServerName())); + } + + + private void checkRegionIsClosed(String encodedRegionName) throws Exception { + + while (!getRS().getRegionsInTransitionInRS().isEmpty()) { + Thread.sleep(1); + } + + try { + Assert.assertFalse(getRS().getRegionByEncodedName(encodedRegionName).isAvailable()); + } catch (NotServingRegionException expected) { + // That's how it work: if the region is closed we have an exception. + } + + // We don't delete the znode here, because there is not always a znode. + } + + @Test(timeout = 60000) + public void testOpenRegionReplica() throws Exception { + openRegion(hriSecondary); + try { + //load some data to primary + HTU.loadNumericRows(table, f, 0, 1000); + + // assert that we can read back from primary + Assert.assertEquals(1000, HTU.countRows(table)); + } finally { + HTU.deleteNumericRows(table, f, 0, 1000); + closeRegion(hriSecondary); + } + } + + /** Tests that the meta location is saved for secondary regions */ + @Test(timeout = 60000) + public void testRegionReplicaUpdatesMetaLocation() throws Exception { + openRegion(hriSecondary); + HTable meta = null; + try { + meta = new HTable(HTU.getConfiguration(), TableName.META_TABLE_NAME); + TestMetaReaderEditor.assertMetaLocation(meta, hriPrimary.getRegionName() + , getRS().getServerName(), -1, 1, false); + } finally { + if (meta != null ) meta.close(); + closeRegion(hriSecondary); + } + } + + @Test(timeout = 60000) + public void testRegionReplicaGets() throws Exception { + try { + //load some data to primary + HTU.loadNumericRows(table, f, 0, 1000); + // assert that we can read back from primary + Assert.assertEquals(1000, HTU.countRows(table)); + // flush so that region replica can read + HTU.getHBaseAdmin().flush(table.getTableName()); + + openRegion(hriSecondary); + + // first try directly against region + HRegion region = getRS().getFromOnlineRegions(hriSecondary.getEncodedName()); + assertGet(region, 42, true); + + assertGetRpc(hriSecondary, 42, true); + + } finally { + HTU.deleteNumericRows(table, HConstants.CATALOG_FAMILY, 0, 1000); + closeRegion(hriSecondary); + } + } + + private void assertGet(HRegion region, int value, boolean expect) throws IOException { + byte[] row = Bytes.toBytes(String.valueOf(value)); + Get get = new Get(row); + Result result = region.get(get); + if (expect) { + Assert.assertArrayEquals(row, result.getValue(f, null)); + } else { + result.isEmpty(); + } + } + + // build a mock rpc + private void assertGetRpc(HRegionInfo info, int value, boolean expect) throws IOException, ServiceException { + byte[] row = Bytes.toBytes(String.valueOf(value)); + Get get = new Get(row); + ClientProtos.GetRequest getReq = RequestConverter.buildGetRequest(info.getRegionName(), get); + ClientProtos.GetResponse getResp = getRS().get(null, getReq); + Result result = ProtobufUtil.toResult(getResp.getResult()); + if (expect) { + Assert.assertArrayEquals(row, result.getValue(f, null)); + } else { + result.isEmpty(); + } + } + + private void restartRegionServer() throws Exception { + afterClass(); + before(); + } + + @Test(timeout = 300000) + public void testRefreshStoreFiles() throws Exception { + // enable store file refreshing + final int refreshPeriod = 2000; // 2 sec + HTU.getConfiguration().setInt("hbase.hstore.compactionThreshold", 100); + HTU.getConfiguration().setInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, refreshPeriod); + // restart the region server so that it starts the refresher chore + restartRegionServer(); + + try { + openRegion(hriSecondary); + + //load some data to primary + HTU.loadNumericRows(table, f, 0, 1000); + // assert that we can read back from primary + Assert.assertEquals(1000, HTU.countRows(table)); + // flush so that region replica can read + HTU.getHBaseAdmin().flush(table.getTableName()); + + // ensure that chore is run + Threads.sleep(4 * refreshPeriod); + + assertGetRpc(hriSecondary, 42, true); + assertGetRpc(hriSecondary, 1042, false); + + //load some data to primary + HTU.loadNumericRows(table, f, 1000, 1100); + HTU.getHBaseAdmin().flush(table.getTableName()); + + HTU.loadNumericRows(table, f, 2000, 2100); + HTU.getHBaseAdmin().flush(table.getTableName()); + + // ensure that chore is run + Threads.sleep(4 * refreshPeriod); + + assertGetRpc(hriSecondary, 42, true); + assertGetRpc(hriSecondary, 1042, true); + assertGetRpc(hriSecondary, 2042, true); + + // ensure that we are see the 3 store files + HRegion secondaryRegion = getRS().getFromOnlineRegions(hriSecondary.getEncodedName()); + Assert.assertEquals(3, secondaryRegion.getStore(f).getStorefilesCount()); + + // force compaction + HTU.compact(table.getName(), true); + + long wakeUpTime = System.currentTimeMillis() + 4 * refreshPeriod; + while (System.currentTimeMillis() < wakeUpTime) { + assertGetRpc(hriSecondary, 42, true); + assertGetRpc(hriSecondary, 1042, true); + assertGetRpc(hriSecondary, 2042, true); + Threads.sleep(10); + } + + // ensure that we see the compacted file only + Assert.assertEquals(1, secondaryRegion.getStore(f).getStorefilesCount()); + + } finally { + HTU.deleteNumericRows(table, HConstants.CATALOG_FAMILY, 0, 1000); + closeRegion(hriSecondary); + } + } +} diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java index 793b839..3494b4f 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java @@ -19,6 +19,11 @@ package org.apache.hadoop.hbase.regionserver; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + import java.io.IOException; import java.lang.ref.SoftReference; import java.security.PrivilegedExceptionAction; @@ -78,6 +83,8 @@ import org.apache.hadoop.util.Progressable; import org.junit.experimental.categories.Category; import org.mockito.Mockito; +import com.google.common.collect.Lists; + /** * Test class for the Store */ @@ -130,7 +137,7 @@ public class TestStore extends TestCase { } private void init(String methodName) throws IOException { - init(methodName, HBaseConfiguration.create()); + init(methodName, TEST_UTIL.getConfiguration()); } private void init(String methodName, Configuration conf) @@ -203,7 +210,7 @@ public class TestStore extends TestCase { int ttl = 4; IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(); EnvironmentEdgeManagerTestHelper.injectEdge(edge); - + Configuration conf = HBaseConfiguration.create(); // Enable the expired store file deletion conf.setBoolean("hbase.store.delete.expired.storefile", true); @@ -258,7 +265,7 @@ public class TestStore extends TestCase { FileSystem fs = FileSystem.get(conf); // Initialize region init(getName(), conf); - + int storeFileNum = 4; for (int i = 1; i <= storeFileNum; i++) { LOG.info("Adding some data for the store file #"+i); @@ -278,12 +285,12 @@ public class TestStore extends TestCase { lowestTimeStampFromFS = getLowestTimeStampFromFS(fs, store.getStorefiles()); assertEquals(lowestTimeStampFromManager, lowestTimeStampFromFS); } - - private static long getLowestTimeStampFromFS(FileSystem fs, + + private static long getLowestTimeStampFromFS(FileSystem fs, final Collection candidates) throws IOException { long minTs = Long.MAX_VALUE; if (candidates.isEmpty()) { - return minTs; + return minTs; } Path[] p = new Path[candidates.size()]; int i = 0; @@ -291,7 +298,7 @@ public class TestStore extends TestCase { p[i] = sf.getPath(); ++i; } - + FileStatus[] stats = fs.listStatus(p); if (stats == null || stats.length == 0) { return minTs; @@ -645,6 +652,7 @@ public class TestStore extends TestCase { conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class); user.runAs(new PrivilegedExceptionAction() { + @Override public Object run() throws Exception { // Make sure it worked (above is sensitive to caching details in hadoop core) FileSystem fs = FileSystem.get(conf); @@ -705,6 +713,7 @@ public class TestStore extends TestCase { overwrite, bufferSize, replication, blockSize, progress), faultPos); } + @Override public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { @@ -880,5 +889,103 @@ public class TestStore extends TestCase { init(this.getName(), conf); assertEquals(DummyStoreEngine.lastCreatedCompactor, this.store.storeEngine.getCompactor()); } + + private void addStoreFile() throws IOException { + StoreFile f = this.store.getStorefiles().iterator().next(); + Path storedir = f.getPath().getParent(); + long seqid = this.store.getMaxSequenceId(true); + Configuration c = TEST_UTIL.getConfiguration(); + FileSystem fs = FileSystem.get(c); + HFileContext fileContext = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).build(); + StoreFile.Writer w = new StoreFile.WriterBuilder(c, new CacheConfig(c), + fs) + .withOutputDir(storedir) + .withFileContext(fileContext) + .build(); + w.appendMetadata(seqid + 1, false); + w.close(); + LOG.info("Added store file:" + w.getPath()); + } + + private void archiveStoreFile(int index) throws IOException { + Collection files = this.store.getStorefiles(); + StoreFile sf = null; + Iterator it = files.iterator(); + for (int i = 0; i <= index; i++) { + sf = it.next(); + } + store.getRegionFileSystem().removeStoreFiles(store.getColumnFamilyName(), Lists.newArrayList(sf)); + } + + public void testRefreshStoreFiles() throws Exception { + init(this.getName()); + + assertEquals(0, this.store.getStorefilesCount()); + + // add some data, flush + this.store.add(new KeyValue(row, family, qf1, 1, (byte[])null)); + flush(1); + assertEquals(1, this.store.getStorefilesCount()); + + // add one more file + addStoreFile(); + + assertEquals(1, this.store.getStorefilesCount()); + store.refreshStoreFiles(); + assertEquals(2, this.store.getStorefilesCount()); + + // add three more files + addStoreFile(); + addStoreFile(); + addStoreFile(); + + assertEquals(2, this.store.getStorefilesCount()); + store.refreshStoreFiles(); + assertEquals(5, this.store.getStorefilesCount()); + + archiveStoreFile(0); + + assertEquals(5, this.store.getStorefilesCount()); + store.refreshStoreFiles(); + assertEquals(4, this.store.getStorefilesCount()); + + archiveStoreFile(0); + archiveStoreFile(1); + archiveStoreFile(2); + + assertEquals(4, this.store.getStorefilesCount()); + store.refreshStoreFiles(); + assertEquals(1, this.store.getStorefilesCount()); + + archiveStoreFile(0); + store.refreshStoreFiles(); + assertEquals(0, this.store.getStorefilesCount()); + } + + @SuppressWarnings("unchecked") + public void testRefreshStoreFilesNotChanged() throws IOException { + init(this.getName()); + + assertEquals(0, this.store.getStorefilesCount()); + + // add some data, flush + this.store.add(new KeyValue(row, family, qf1, 1, (byte[])null)); + flush(1); + // add one more file + addStoreFile(); + + HStore spiedStore = spy(store); + + // call first time after files changed + spiedStore.refreshStoreFiles(); + assertEquals(2, this.store.getStorefilesCount()); + verify(spiedStore, times(1)).replaceStoreFiles(any(Collection.class), any(Collection.class)); + + // call second time + spiedStore.refreshStoreFiles(); + + //ensure that replaceStoreFiles is not called if files are not refreshed + verify(spiedStore, times(0)).replaceStoreFiles(null, null); + } } diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java new file mode 100644 index 0000000..ab2fa6c --- /dev/null +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java @@ -0,0 +1,209 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.StoppableImplementation; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestStoreFileRefresherChore { + + private HBaseTestingUtility TEST_UTIL; + private Path testDir; + + @Before + public void setUp() { + TEST_UTIL = new HBaseTestingUtility(); + testDir = TEST_UTIL.getDataTestDir("TestStoreFileRefresherChore"); + } + + private HTableDescriptor getTableDesc(TableName tableName, byte[]... families) { + HTableDescriptor htd = new HTableDescriptor(tableName); + for (byte[] family : families) { + HColumnDescriptor hcd = new HColumnDescriptor(family); + // Set default to be three versions. + hcd.setMaxVersions(Integer.MAX_VALUE); + htd.addFamily(hcd); + } + return htd; + } + + static class FailingHRegionFileSystem extends HRegionFileSystem { + boolean fail = false; + FailingHRegionFileSystem(Configuration conf, FileSystem fs, Path tableDir, HRegionInfo regionInfo) { + super(conf, fs, tableDir, regionInfo); + } + + @Override + public Collection getStoreFiles(String familyName) throws IOException { + if (fail) { + throw new IOException("simulating FS failure"); + } + return super.getStoreFiles(familyName); + } + } + + private HRegion initHRegion(HTableDescriptor htd, byte[] startKey, byte[] stopKey, int replicaId) throws IOException { + Configuration conf = TEST_UTIL.getConfiguration(); + Path tableDir = new Path(testDir, htd.getTableName().getNameAsString()); + + HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false, 0, replicaId); + + HRegionFileSystem fs = new FailingHRegionFileSystem(conf, tableDir.getFileSystem(conf), tableDir, info); + HRegion region = new HRegion(fs, null, conf, htd, null); + + region.initialize(); + + return region; + } + + private void putData(HRegion region, int startRow, int numRows, byte[] qf, byte[]... families) throws IOException { + for (int i = startRow; i < startRow + numRows; i++) { + Put put = new Put(Bytes.toBytes("" + i)); + put.setDurability(Durability.SKIP_WAL); + for (byte[] family : families) { + put.add(family, qf, null); + } + region.put(put); + } + } + + private void verifyData(HRegion newReg, int startRow, int numRows, byte[] qf, byte[]... families) + throws IOException { + for (int i = startRow; i < startRow + numRows; i++) { + byte[] row = Bytes.toBytes("" + i); + Get get = new Get(row); + for (byte[] family : families) { + get.addColumn(family, qf); + } + Result result = newReg.get(get); + Cell[] raw = result.rawCells(); + assertEquals(families.length, result.size()); + for (int j = 0; j < families.length; j++) { + assertTrue(CellUtil.matchingRow(raw[j], row)); + assertTrue(CellUtil.matchingFamily(raw[j], families[j])); + assertTrue(CellUtil.matchingQualifier(raw[j], qf)); + } + } + } + + static class StaleStorefileRefresherChore extends StorefileRefresherChore { + boolean isStale = false; + public StaleStorefileRefresherChore(int period, HRegionServer regionServer, + Stoppable stoppable) { + super(period, regionServer, stoppable); + } + @Override + protected boolean isRegionStale(String encodedName, long time) { + return isStale; + } + } + + @Test (timeout = 60000) + public void testIsStale() throws IOException { + int period = 0; + byte[][] families = new byte[][] {Bytes.toBytes("cf")}; + byte[] qf = Bytes.toBytes("cq"); + + HRegionServer regionServer = mock(HRegionServer.class); + List regions = new ArrayList(); + when(regionServer.getOnlineRegionsLocalContext()).thenReturn(regions); + when(regionServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration()); + + HTableDescriptor htd = getTableDesc(TableName.valueOf("testIsStale"), families); + HRegion primary = initHRegion(htd, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, 0); + HRegion replica1 = initHRegion(htd, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, 1); + regions.add(primary); + regions.add(replica1); + + StaleStorefileRefresherChore chore = new StaleStorefileRefresherChore(period, regionServer, new StoppableImplementation()); + + // write some data to primary and flush + putData(primary, 0, 100, qf, families); + primary.flushcache(); + verifyData(primary, 0, 100, qf, families); + + try { + verifyData(replica1, 0, 100, qf, families); + Assert.fail("should have failed"); + } catch(AssertionError ex) { + // expected + } + chore.chore(); + verifyData(replica1, 0, 100, qf, families); + + // simulate an fs failure where we cannot refresh the store files for the replica + ((FailingHRegionFileSystem)replica1.getRegionFileSystem()).fail = true; + + // write some more data to primary and flush + putData(primary, 100, 100, qf, families); + primary.flushcache(); + verifyData(primary, 0, 200, qf, families); + + chore.chore(); // should not throw ex, but we cannot refresh the store files + + verifyData(replica1, 0, 100, qf, families); + try { + verifyData(replica1, 100, 100, qf, families); + Assert.fail("should have failed"); + } catch(AssertionError ex) { + // expected + } + + chore.isStale = true; + chore.chore(); //now after this, we cannot read back any value + try { + verifyData(replica1, 0, 100, qf, families); + Assert.fail("should have failed with IOException"); + } catch(IOException ex) { + // expected + } + } +}