commit c4451617423a37eee88e3e3a09967f5714858beb Author: stack Date: Mon Aug 31 22:30:53 2015 -0700 Repro diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 6741957..583a1e7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -302,7 +302,8 @@ public class HFile { try { ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction()); } catch (UnsupportedOperationException uoe) { - LOG.debug("Unable to set drop behind on " + path, uoe); + if (LOG.isDebugEnabled()) LOG.debug("Unable to set drop behind on " + path); + else if (LOG.isTraceEnabled()) LOG.trace("Unable to set drop behind on " + path, uoe); } } return createWriter(fs, path, ostream, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 312c1ac..b484f33 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -202,12 +202,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final String LOAD_CFS_ON_DEMAND_CONFIG_KEY = "hbase.hregion.scan.loadColumnFamiliesOnDemand"; - // in milliseconds - private static final String MAX_WAIT_FOR_SEQ_ID_KEY = - "hbase.hregion.max.wait.for.seq.id"; - - private static final int DEFAULT_MAX_WAIT_FOR_SEQ_ID = 60000; - /** * This is the global default value for durability. All tables/mutations not * defining a durability or using USE_DEFAULT will default to this value. @@ -239,7 +233,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * file in this sequence id's order; i.e. edit #2 will be in the WAL after edit #1. * Its default value is -1L. This default is used as a marker to indicate * that the region hasn't opened yet. Once it is opened, it is set to the derived - * {@link #openSeqNum}, the largest sequence id of all hfiles opened under this Region. + * #openSeqNum, the largest sequence id of all hfiles opened under this Region. * *

Control of this sequence is handed off to the WAL implementation. It is responsible * for tagging edits with the correct sequence id since it is responsible for getting the @@ -339,7 +333,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi */ private boolean isLoadingCfsOnDemandDefault = false; - private int maxWaitForSeqId; private final AtomicInteger majorInProgress = new AtomicInteger(0); private final AtomicInteger minorInProgress = new AtomicInteger(0); @@ -675,7 +668,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi this.rowLockWaitDuration = conf.getInt("hbase.rowlock.wait.duration", DEFAULT_ROWLOCK_WAIT_DURATION); - maxWaitForSeqId = conf.getInt(MAX_WAIT_FOR_SEQ_ID_KEY, DEFAULT_MAX_WAIT_FOR_SEQ_ID); this.isLoadingCfsOnDemandDefault = conf.getBoolean(LOAD_CFS_ON_DEMAND_CONFIG_KEY, true); this.htableDescriptor = htd; this.rsServices = rsServices; @@ -2436,7 +2428,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi @VisibleForTesting protected long getNextSequenceId(final WAL wal) throws IOException { WALKey key = this.appendEmptyEdit(wal, null); - return key.getSequenceId(maxWaitForSeqId); + return key.getSequenceId(); } ////////////////////////////////////////////////////////////////////////////// @@ -7403,7 +7395,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public static final long FIXED_OVERHEAD = ClassSize.align( ClassSize.OBJECT + ClassSize.ARRAY + - 45 * ClassSize.REFERENCE + 3 * Bytes.SIZEOF_INT + + 45 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + (14 * Bytes.SIZEOF_LONG) + 5 * Bytes.SIZEOF_BOOLEAN); @@ -8055,13 +8047,20 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * @throws IOException */ private WALKey appendEmptyEdit(final WAL wal, List cells) throws IOException { + return appendEmptyEdit(wal, getTableDesc(), getRegionInfo(), this.sequenceId, cells); + } + + @VisibleForTesting + public static WALKey appendEmptyEdit(final WAL wal, final HTableDescriptor htd, + final HRegionInfo hri, final AtomicLong sequenceId, final List cells) + throws IOException { // we use HLogKey here instead of WALKey directly to support legacy coprocessors. - WALKey key = new HLogKey(getRegionInfo().getEncodedNameAsBytes(), getRegionInfo().getTable(), + WALKey key = new HLogKey(hri.getEncodedNameAsBytes(), hri.getTable(), WALKey.NO_SEQUENCE_ID, 0, null, HConstants.NO_NONCE, HConstants.NO_NONCE); // Call append but with an empty WALEdit. The returned seqeunce id will not be associated // with any edit and we can be sure it went in after all outstanding appends. - wal.append(getTableDesc(), getRegionInfo(), key, - WALEdit.EMPTY_WALEDIT, this.sequenceId, false, cells); + wal.append(htd, hri, key, + WALEdit.EMPTY_WALEDIT, sequenceId, false, cells); return key; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java index 42621c9..aa62f7c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/LogRoller.java @@ -26,19 +26,15 @@ import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; -import org.apache.hadoop.hbase.wal.WAL; import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.HasThread; - -import java.io.IOException; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.locks.ReentrantLock; +import org.apache.hadoop.hbase.wal.WAL; import com.google.common.annotations.VisibleForTesting; @@ -113,7 +109,9 @@ public class LogRoller extends HasThread { if (!periodic) { synchronized (rollLog) { try { - if (!rollLog.get()) rollLog.wait(this.threadWakeFrequency); + if (!rollLog.get()) { + rollLog.wait(this.threadWakeFrequency); + } } catch (InterruptedException e) { // Fall through } @@ -184,5 +182,4 @@ public class LogRoller extends HasThread { requester); } } - -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java index fa69d63..c94febb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java @@ -758,6 +758,12 @@ public class FSHLog implements WAL { } /** + * Used to manufacture race condition reliably. For testing only. + */ + @VisibleForTesting + protected void afterZigZagLatch() {} + + /** * Cleans up current writer closing it and then puts in place the passed in * nextWriter. * @@ -786,6 +792,7 @@ public class FSHLog implements WAL { SyncFuture syncFuture = null; SafePointZigZagLatch zigzagLatch = (this.ringBufferEventHandler == null)? null: this.ringBufferEventHandler.attainSafePoint(); + afterZigZagLatch(); TraceScope scope = Trace.startSpan("FSHFile.replaceWriter"); try { // Wait on the safe point to be achieved. Send in a sync in case nothing has hit the @@ -1065,6 +1072,12 @@ public class FSHLog implements WAL { return sequence; } + @VisibleForTesting + /** + * For testing so we can mess up syncs + */ + protected void presync() throws IOException {} + /** * Thread to runs the hdfs sync call. This call takes a while to complete. This is the longest * pole adding edits to the WAL and this must complete to be sure all edits persisted. We run @@ -1203,6 +1216,7 @@ public class FSHLog implements WAL { Throwable t = null; try { Trace.addTimelineAnnotation("syncing writer"); + presync(); writer.sync(); Trace.addTimelineAnnotation("writer synced"); currentSequence = updateHighestSyncedSequence(currentSequence); @@ -1696,6 +1710,7 @@ public class FSHLog implements WAL { } private void cleanupOutstandingSyncsOnException(final long sequence, final Exception e) { + // This is a bit crazy. There could be handler-count syncFutures outstanding. for (int i = 0; i < this.syncFuturesCount; i++) this.syncFutures[i].done(sequence, e); this.syncFuturesCount = 0; } @@ -1748,9 +1763,7 @@ public class FSHLog implements WAL { } // Below expects that the offer 'transfers' responsibility for the outstanding syncs to the - // syncRunner. We should never get an exception in here. HBASE-11145 was because queue - // was sized exactly to the count of user handlers but we could have more if we factor in - // meta handlers doing opens and closes. + // syncRunner. We should never get an exception in here. int index = Math.abs(this.syncRunnerIndex++) % this.syncRunners.length; try { this.syncRunners[index].offer(sequence, this.syncFutures, this.syncFuturesCount); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SyncFuture.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SyncFuture.java index 62ab458..7de8367 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SyncFuture.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SyncFuture.java @@ -42,9 +42,9 @@ import org.apache.htrace.Span; * SyncFutures as done; i.e. we batch up sync calls rather than do a dfs sync * call every time a Handler asks for it. *

- * SyncFutures are immutable but recycled. Call {@link #reset(long, Span)} before use even + * SyncFutures are immutable but recycled. Call #reset(long, Span) before use even * if it the first time, start the sync, then park the 'hitched' thread on a call to - * {@link #get()} + * #get(). */ @InterfaceAudience.Private class SyncFuture { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java index 69c2aec..e8056e4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java @@ -30,7 +30,6 @@ import java.util.NavigableMap; import java.util.TreeMap; import java.util.UUID; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import org.apache.hadoop.hbase.util.ByteStringer; @@ -302,24 +301,8 @@ public class WALKey implements SequenceId, Comparable { */ @Override public long getSequenceId() throws IOException { - return getSequenceId(-1); - } - - /** - * Wait for sequence number is assigned & return the assigned value - * @param maxWaitForSeqId maximum duration, in milliseconds, to wait for seq number to be assigned - * @return long the new assigned sequence number - * @throws IOException - */ - public long getSequenceId(int maxWaitForSeqId) throws IOException { try { - if (maxWaitForSeqId < 0) { - this.seqNumAssignedLatch.await(); - } else { - if (!this.seqNumAssignedLatch.await(maxWaitForSeqId, TimeUnit.MILLISECONDS)) { - throw new IOException("Timed out waiting for seq number to be assigned"); - } - } + this.seqNumAssignedLatch.await(); } catch (InterruptedException ie) { LOG.warn("Thread interrupted waiting for next log sequence number"); InterruptedIOException iie = new InterruptedIOException(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index cb2203a..2a6974a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -46,6 +46,7 @@ import static org.mockito.Mockito.when; import java.io.IOException; import java.io.InterruptedIOException; +import java.net.InetSocketAddress; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; @@ -55,8 +56,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Set; import java.util.TreeMap; import java.util.UUID; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -72,8 +76,11 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.ChoreService; import org.apache.hadoop.hbase.CompatibilitySingletonFactory; +import org.apache.hadoop.hbase.CoordinatedStateManager; import org.apache.hadoop.hbase.DroppedSnapshotException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestCase; @@ -91,12 +98,14 @@ import org.apache.hadoop.hbase.MultithreadedTestUtil.RepeatingTestThread; import org.apache.hadoop.hbase.MultithreadedTestUtil.TestThread; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RegionTooBusyException; +import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.TagType; import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.client.Append; +import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.Get; @@ -107,6 +116,7 @@ import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException; +import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.ColumnCountGetFilter; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; @@ -118,20 +128,25 @@ import org.apache.hadoop.hbase.filter.PrefixFilter; import org.apache.hadoop.hbase.filter.SingleColumnValueExcludeFilter; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.ipc.RpcServerInterface; +import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor; import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor; import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.FlushAction; import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.StoreFlushDescriptor; import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor; import org.apache.hadoop.hbase.protobuf.generated.WALProtos.StoreDescriptor; +import org.apache.hadoop.hbase.quotas.RegionServerQuotaManager; import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl; import org.apache.hadoop.hbase.regionserver.Region.RowLock; import org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem; import org.apache.hadoop.hbase.regionserver.handler.FinishRegionRecoveringHandler; +import org.apache.hadoop.hbase.regionserver.wal.FSHLog; import org.apache.hadoop.hbase.regionserver.wal.HLogKey; import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL; import org.apache.hadoop.hbase.regionserver.wal.MetricsWALSource; @@ -155,7 +170,11 @@ import org.apache.hadoop.hbase.wal.WAL; import org.apache.hadoop.hbase.wal.WALFactory; import org.apache.hadoop.hbase.wal.WALKey; import org.apache.hadoop.hbase.wal.WALProvider; +import org.apache.hadoop.hbase.wal.WALProvider.Writer; import org.apache.hadoop.hbase.wal.WALSplitter; +import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.zookeeper.KeeperException; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -171,6 +190,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.protobuf.ByteString; +import com.google.protobuf.Service; /** * Basic stand-alone testing of HRegion. No clusters! @@ -233,6 +253,462 @@ public class TestHRegion { } /** + * Extremely minimal mock Server. Does nothing but return a Configuration. + */ + private class ExtremelyMinimalMockServer implements Server { + @Override + public void abort(String why, Throwable e) { + // TODO Auto-generated method stub + } + + @Override + public boolean isAborted() { + // TODO Auto-generated method stub + return false; + } + + @Override + public void stop(String why) { + // TODO Auto-generated method stub + } + + @Override + public boolean isStopped() { + // TODO Auto-generated method stub + return false; + } + + @Override + public Configuration getConfiguration() { + return TEST_UTIL.getConfiguration(); + } + + @Override + public ZooKeeperWatcher getZooKeeper() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ClusterConnection getConnection() { + // TODO Auto-generated method stub + return null; + } + + @Override + public MetaTableLocator getMetaTableLocator() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ServerName getServerName() { + // TODO Auto-generated method stub + return null; + } + + @Override + public CoordinatedStateManager getCoordinatedStateManager() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ChoreService getChoreService() { + // TODO Auto-generated method stub + return null; + } + } + + /** + * Extremely minimal mock regionserver services. Made by the IDE. No customization. All defaults. + * Place holder only. + */ + private class AllDefaultsMockRegionServerServices implements RegionServerServices { + @Override + public void addToOnlineRegions(Region r) { + // TODO Auto-generated method stub + } + + @Override + public boolean removeFromOnlineRegions(Region r, ServerName destination) { + // TODO Auto-generated method stub + return false; + } + + @Override + public Region getFromOnlineRegions(String encodedRegionName) { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getOnlineRegions(TableName tableName) throws IOException { + // TODO Auto-generated method stub + return null; + } + + @Override + public Configuration getConfiguration() { + return null; + } + + @Override + public ZooKeeperWatcher getZooKeeper() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ClusterConnection getConnection() { + // TODO Auto-generated method stub + return null; + } + + @Override + public MetaTableLocator getMetaTableLocator() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ServerName getServerName() { + // TODO Auto-generated method stub + return null; + } + + @Override + public CoordinatedStateManager getCoordinatedStateManager() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ChoreService getChoreService() { + // TODO Auto-generated method stub + return null; + } + + @Override + public void abort(String why, Throwable e) { + // TODO Auto-generated method stub + } + + @Override + public boolean isAborted() { + // TODO Auto-generated method stub + return false; + } + + @Override + public void stop(String why) { + // TODO Auto-generated method stub + } + + @Override + public boolean isStopped() { + // TODO Auto-generated method stub + return false; + } + + @Override + public void updateRegionFavoredNodesMapping(String encodedRegionName, + List favoredNodes) { + // TODO Auto-generated method stub + } + + @Override + public InetSocketAddress[] getFavoredNodesForRegion(String encodedRegionName) { + // TODO Auto-generated method stub + return null; + } + + @Override + public boolean isStopping() { + // TODO Auto-generated method stub + return false; + } + + @Override + public WAL getWAL(HRegionInfo regionInfo) throws IOException { + // TODO Auto-generated method stub + return null; + } + + @Override + public CompactionRequestor getCompactionRequester() { + // TODO Auto-generated method stub + return null; + } + + @Override + public FlushRequester getFlushRequester() { + // TODO Auto-generated method stub + return null; + } + + @Override + public RegionServerAccounting getRegionServerAccounting() { + // TODO Auto-generated method stub + return null; + } + + @Override + public TableLockManager getTableLockManager() { + // TODO Auto-generated method stub + return null; + } + + @Override + public RegionServerQuotaManager getRegionServerQuotaManager() { + // TODO Auto-generated method stub + return null; + } + + @Override + public void postOpenDeployTasks(PostOpenDeployContext context) + throws KeeperException, IOException { + // TODO Auto-generated method stub + } + + @Override + public void postOpenDeployTasks(Region r) throws KeeperException, IOException { + // TODO Auto-generated method stub + } + + @Override + public boolean reportRegionStateTransition(RegionStateTransitionContext context) { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean reportRegionStateTransition(TransitionCode code, + long openSeqNum, HRegionInfo... hris) { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean reportRegionStateTransition(TransitionCode code, HRegionInfo... hris) { + // TODO Auto-generated method stub + return false; + } + + @Override + public RpcServerInterface getRpcServer() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ConcurrentMap getRegionsInTransitionInRS() { + // TODO Auto-generated method stub + return null; + } + + @Override + public FileSystem getFileSystem() { + // TODO Auto-generated method stub + return null; + } + + @Override + public Leases getLeases() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ExecutorService getExecutorService() { + // TODO Auto-generated method stub + return null; + } + + @Override + public Map getRecoveringRegions() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ServerNonceManager getNonceManager() { + // TODO Auto-generated method stub + return null; + } + + @Override + public boolean registerService(Service service) { + // TODO Auto-generated method stub + return false; + } + + @Override + public HeapMemoryManager getHeapMemoryManager() { + // TODO Auto-generated method stub + return null; + } + + @Override + public double getCompactionPressure() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public Set getOnlineTables() { + // TODO Auto-generated method stub + return null; + } + } + + /** + * Reproduce locking up that happens when we get an IOE out of an append. See HBASE-14317. + * First I need to set up some fat mocks for Server and RegionServerServices. I also need to + * set up a WAL that will throw an exception when we go to append to it. + */ + @Test (timeout=60000) + public void testLockedUpWALSystem() throws IOException { + // A WAL that we can have throw exceptions when a flag is set. + class DodgyFSLog extends FSHLog { + /** + * Set this when want the WAL to start throwing exceptions. + */ + volatile boolean throwException = false; + + /** + * Latch to hold up processing a while until after another operation has had time to run. + */ + CountDownLatch latch = new CountDownLatch(1); + + public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) + throws IOException { + super(fs, root, logDir, conf); + } + + @Override + protected void afterZigZagLatch() { + // If throwException set, then append will throw an exception causing the WAL to be + // rolled. We'll come in here. Hold up processing until a sync can get in before + // the zigzag has time to finish setup and get its own sync in. + if (throwException) + try { + this.latch.await(); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + @Override + protected Writer createWriterInstance(Path path) throws IOException { + final Writer w = super.createWriterInstance(path); + return new Writer() { + @Override + public void close() throws IOException { + w.close(); + } + + @Override + public void sync() throws IOException { + // This sync happens out on + if (throwException) { + if (throwException) { + latch.countDown(); + // Threads.sleep(1000); + throw new IOException("FAKE! Failed to replace a bad datanode...SYNC!"); + } + } + w.sync(); + } + + @Override + public void append(Entry entry) throws IOException { + if (throwException) throw new IOException("FAKE! Failed to replace a bad datanode..."); + w.append(entry); + } + + @Override + public long getLength() throws IOException { + return w.getLength(); + } + }; + } + } + + // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with + // the test. + FileSystem fs = FileSystem.get(CONF); + Path rootDir = new Path(dir + "testLockedUpWALSystem"); + DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, "testLockedUpWALSystem", CONF); + // I need a log roller running. + LogRoller logRoller = new LogRoller(new ExtremelyMinimalMockServer(), + new AllDefaultsMockRegionServerServices()); + logRoller.addWAL(dodgyWAL); + logRoller.start(); + // Now start adding in edits. + HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME); + final HRegion region = initHRegion(tableName, null, null, name.getMethodName(), + CONF, false, Durability.SYNC_WAL, dodgyWAL, COLUMN_FAMILY_BYTES); + byte [] bytes = Bytes.toBytes(name.getMethodName()); + try { + // boolean threwIOE = false; + try { + // First get something into memstore. Make a Put and then pull the Cell out of it. Will + // manage append and sync carefully in below to manufacture hang. We keep adding same + // edit. WAL subsystem doesn't care. + Put put = new Put(bytes); + put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes); + WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName()); + WALEdit edit = new WALEdit(); + List cells = new ArrayList(); + for (CellScanner cs = put.cellScanner(); cs.advance();) { + edit.add(cs.current()); + cells.add(cs.current()); + } + // Put something in memstore and out in the WAL. Do a big number of appends so we push + // out other side of the ringbuffer. If small numbers, we just hang around in the ring + // buffer. + for (int i = 0; i < 1000; i++) { + dodgyWAL.append(htd, region.getRegionInfo(), key, edit, region.sequenceId, true, cells); + } + // Set it so we start throwing exceptions. + dodgyWAL.throwException = true; + // This append provoke a WAL roll. + dodgyWAL.append(htd, region.getRegionInfo(), key, edit, region.sequenceId, true, cells); + try { + dodgyWAL.sync(); + } catch (Exception e) { + LOG.info("Caught exception", e); + } + // Get a memstore flush going too so we have same hung profile as up in the issue over + // in HBASE-14317. + Thread t = new Thread ("flusher") { + public void run() { + try { + region.flush(false); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + }; + }; + t.setDaemon(true); + t.start(); + // Wait till it gets into flushing. It will get stuck on getSequenceId. + while (!region.writestate.flushing) Threads.sleep(1); + } catch (IOException ioe) { + threwIOE = true; + } finally { + // FIX assertTrue("The regionserver should have thrown an exception", threwIOE); + } + } finally { + if (region != null) region.close(); + if (dodgyWAL != null) dodgyWAL.close(); + } + } + + /** * Test that I can use the max flushed sequence id after the close. * @throws IOException */ @@ -293,6 +769,8 @@ public class TestHRegion { HRegion.closeHRegion(region); } + + /* * This test is for verifying memstore snapshot size is correctly updated in case of rollback * See HBASE-10845 @@ -893,7 +1371,7 @@ public class TestHRegion { // now verify that the flush markers are written wal.shutdown(); - WAL.Reader reader = wals.createReader(fs, DefaultWALProvider.getCurrentFileName(wal), + WAL.Reader reader = WALFactory.createReader(fs, DefaultWALProvider.getCurrentFileName(wal), TEST_UTIL.getConfiguration()); try { List flushDescriptors = new ArrayList(); @@ -5663,7 +6141,6 @@ public class TestHRegion { putData(startRow, numRows, qualifier, families); int splitRow = startRow + numRows; putData(splitRow, numRows, qualifier, families); - int endRow = splitRow + numRows; region.flush(true); HRegion [] regions = null;