diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index e23791a..3cda141 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -151,8 +151,8 @@ import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputCont import org.apache.hadoop.hbase.regionserver.compactions.NoLimitCompactionThroughputController; import org.apache.hadoop.hbase.regionserver.wal.HLogKey; import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL; -import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; import org.apache.hadoop.hbase.regionserver.wal.ReplayHLogKey; +import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.regionserver.wal.WALUtil; import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; @@ -233,7 +233,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * file in this sequence id's order; i.e. edit #2 will be in the WAL after edit #1. * Its default value is -1L. This default is used as a marker to indicate * that the region hasn't opened yet. Once it is opened, it is set to the derived - * {@link #openSeqNum}, the largest sequence id of all hfiles opened under this Region. + * #openSeqNum, the largest sequence id of all hfiles opened under this Region. * *
Control of this sequence is handed off to the WAL implementation. It is responsible
* for tagging edits with the correct sequence id since it is responsible for getting the
@@ -2078,7 +2078,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
if (this.memstoreSize.get() <= 0) {
// Take an update lock because am about to change the sequence id and we want the sequence id
// to be at the border of the empty memstore.
- MultiVersionConsistencyControl.WriteEntry w = null;
+ MultiVersionConsistencyControl.WriteEntry writeEntry = null;
this.updatesLock.writeLock().lock();
try {
if (this.memstoreSize.get() <= 0) {
@@ -2089,14 +2089,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// etc.)
// wal can be null replaying edits.
if (wal != null) {
- w = mvcc.beginMemstoreInsert();
+ writeEntry = mvcc.beginMemstoreInsert();
long flushOpSeqId = getNextSequenceId(wal);
FlushResult flushResult = new FlushResultImpl(
FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, flushOpSeqId, "Nothing to flush",
writeFlushRequestMarkerToWAL(wal, writeFlushWalMarker));
- w.setWriteNumber(flushOpSeqId);
- mvcc.waitForPreviousTransactionsComplete(w);
- w = null;
+ writeEntry.setWriteNumber(flushOpSeqId);
+ mvcc.waitForPreviousTransactionsComplete(writeEntry);
+ writeEntry = null;
return new PrepareFlushResult(flushResult, myseqid);
} else {
return new PrepareFlushResult(
@@ -2107,8 +2107,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
}
} finally {
this.updatesLock.writeLock().unlock();
- if (w != null) {
- mvcc.advanceMemstore(w);
+ if (writeEntry != null) {
+ mvcc.advanceMemstore(writeEntry);
}
}
}
@@ -2131,7 +2131,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// to do this for a moment. It is quick. We also set the memstore size to zero here before we
// allow updates again so its value will represent the size of the updates received
// during flush
- MultiVersionConsistencyControl.WriteEntry w = null;
+ MultiVersionConsistencyControl.WriteEntry writeEntry = null;
// We have to take an update lock during snapshot, or else a write could end up in both snapshot
// and memstore (makes it difficult to do atomic rows then)
status.setStatus("Obtaining lock to block concurrent updates");
@@ -2163,7 +2163,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
long trxId = 0;
try {
try {
- w = mvcc.beginMemstoreInsert();
+ writeEntry = mvcc.beginMemstoreInsert();
if (wal != null) {
Long earliestUnflushedSequenceIdForTheRegion =
wal.startCacheFlush(encodedRegionName, flushedFamilyNames);
@@ -2236,8 +2236,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
try {
wal.sync(); // ensure that flush marker is sync'ed
} catch (IOException ioe) {
- LOG.warn("Unexpected exception while wal.sync(), ignoring. Exception: "
- + StringUtils.stringifyException(ioe));
+ wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
+ throw ioe;
}
}
@@ -2246,14 +2246,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// uncommitted transactions from being written into HFiles.
// We have to block before we start the flush, otherwise keys that
// were removed via a rollbackMemstore could be written to Hfiles.
- w.setWriteNumber(flushOpSeqId);
- mvcc.waitForPreviousTransactionsComplete(w);
+ writeEntry.setWriteNumber(flushOpSeqId);
+ mvcc.waitForPreviousTransactionsComplete(writeEntry);
// set w to null to prevent mvcc.advanceMemstore from being called again inside finally block
- w = null;
+ writeEntry = null;
} finally {
- if (w != null) {
- // in case of failure just mark current w as complete
- mvcc.advanceMemstore(w);
+ if (writeEntry != null) {
+ // in case of failure just mark current writeEntry as complete
+ mvcc.advanceMemstore(writeEntry);
}
}
return new PrepareFlushResult(storeFlushCtxs, committedFiles, storeFlushableSize, startTime, flushOpSeqId,
@@ -2877,7 +2877,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
long currentNonceGroup = HConstants.NO_NONCE, currentNonce = HConstants.NO_NONCE;
WALEdit walEdit = new WALEdit(isInReplay);
- MultiVersionConsistencyControl.WriteEntry w = null;
+ MultiVersionConsistencyControl.WriteEntry writeEntry = null;
long txid = 0;
boolean doRollBackMemstore = false;
boolean locked = false;
@@ -3030,7 +3030,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// ------------------------------------
// Acquire the latest mvcc number
// ----------------------------------
- w = mvcc.beginMemstoreInsertWithSeqNum(mvccNum);
+ writeEntry = mvcc.beginMemstoreInsertWithSeqNum(mvccNum);
// calling the pre CP hook for batch mutation
if (!isInReplay && coprocessorHost != null) {
@@ -3145,7 +3145,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
txid = this.wal.append(this.htableDescriptor, this.getRegionInfo(), walKey, walEdit,
getSequenceId(), true, memstoreCells);
}
- if(walKey == null){
+ if (walKey == null){
// Append a faked WALEdit in order for SKIP_WAL updates to get mvcc assigned
walKey = this.appendEmptyEdit(this.wal, memstoreCells);
}
@@ -3179,9 +3179,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// ------------------------------------------------------------------
// STEP 8. Advance mvcc. This will make this put visible to scanners and getters.
// ------------------------------------------------------------------
- if (w != null) {
- mvcc.completeMemstoreInsertWithSeqNum(w, walKey);
- w = null;
+ if (writeEntry != null) {
+ mvcc.completeMemstoreInsertWithSeqNum(writeEntry, walKey);
+ writeEntry = null;
}
// ------------------------------------
@@ -3210,9 +3210,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// if the wal sync was unsuccessful, remove keys from memstore
if (doRollBackMemstore) {
rollbackMemstore(memstoreCells);
- }
- if (w != null) {
- mvcc.completeMemstoreInsertWithSeqNum(w, walKey);
+ if (writeEntry != null) mvcc.cancelMemstoreInsert(writeEntry);
+ } else if (writeEntry != null) {
+ mvcc.completeMemstoreInsertWithSeqNum(writeEntry, walKey);
}
if (locked) {
@@ -6743,6 +6743,9 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
processor.postBatchMutate(this);
}
} finally {
+ // TODO: Make this method look like all other methods that are doing append/sync and
+ // memstore rollback such as append and doMiniBatchMutation. Currently it is a little
+ // different. Make them all share same code!
if (!mutations.isEmpty() && !walSyncSuccessful) {
LOG.warn("Wal sync failed. Roll back " + mutations.size() +
" memstore keyvalues for row(s):" + StringUtils.byteToHexString(
@@ -6753,6 +6756,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
getStore(cell).rollback(cell);
}
}
+ if (writeEntry != null) {
+ mvcc.cancelMemstoreInsert(writeEntry);
+ writeEntry = null;
+ }
}
// 13. Roll mvcc forward
if (writeEntry != null) {
@@ -6854,7 +6861,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
startRegionOperation(Operation.APPEND);
this.writeRequestsCount.increment();
long mvccNum = 0;
- WriteEntry w = null;
+ WriteEntry writeEntry = null;
WALKey walKey = null;
RowLock rowLock = null;
List To read an WAL, call {@link WALFactory#createReader(org.apache.hadoop.fs.FileSystem,
* org.apache.hadoop.fs.Path)}.
+ *
+ * Failure Semantic
+ * If an exception on append or sync, roll the WAL because the current WAL is now a lame duck;
+ * any more appends or syncs will fail also with the same original exception. If we have made
+ * successful appends to the WAL and we then are unable to sync them, our current semantic is to
+ * return error to the client that the appends failed but also to abort the current context,
+ * usually the hosting server. We need to replay the WALs. TODO: Change this semantic. A roll of
+ * WAL may be sufficient as long as we have flagged client that the append failed. TODO:
+ * replication may pick up these last edits though they have been marked as failed append (Need to
+ * keep our own file lengths, not rely on HDFS).
*/
@InterfaceAudience.Private
public class FSHLog implements WAL {
@@ -386,7 +396,7 @@ public class FSHLog implements WAL {
* WAL Comparator; it compares the timestamp (log filenum), present in the log file name.
* Throws an IllegalArgumentException if used to compare paths from different wals.
*/
- public final ComparatornextWriter.
*
@@ -925,6 +950,7 @@ public class FSHLog implements WAL {
SyncFuture syncFuture = null;
SafePointZigZagLatch zigzagLatch = (this.ringBufferEventHandler == null)?
null: this.ringBufferEventHandler.attainSafePoint();
+ afterCreatingZigZagLatch();
TraceScope scope = Trace.startSpan("FSHFile.replaceWriter");
try {
// Wait on the safe point to be achieved. Send in a sync in case nothing has hit the
@@ -938,9 +964,10 @@ public class FSHLog implements WAL {
syncFuture = zigzagLatch.waitSafePoint(publishSyncOnRingBuffer());
}
} catch (FailedSyncBeforeLogCloseException e) {
+ // If unflushed/unsynced entries on close, it is reason to abort.
if (isUnflushedEntries()) throw e;
// Else, let is pass through to the close.
- LOG.warn("Failed last sync but no outstanding unsync edits so falling through to close; " +
+ LOG.warn("Failed sync but no outstanding unsync'd edits so falling through to close; " +
e.getMessage());
}
@@ -991,8 +1018,19 @@ public class FSHLog implements WAL {
// Let the writer thread go regardless, whether error or not.
if (zigzagLatch != null) {
zigzagLatch.releaseSafePoint();
- // It will be null if we failed our wait on safe point above.
- if (syncFuture != null) blockOnSync(syncFuture);
+ // syncFuture will be null if we failed our wait on safe point above. Otherwise, if
+ // latch was obtained successfully, the sync we threw in either trigger the latch or it
+ // got stamped with an exception because the WAL was damaged and we could not sync. Now
+ // the write pipeline has been opened up again by releasing the safe point, process the
+ // syncFuture we got above. This is probably a noop but it may be stale exception from
+ // when old WAL was in place. Catch it if so.
+ if (syncFuture != null) {
+ try {
+ blockOnSync(syncFuture);
+ } catch (IOException ioe) {
+ if (LOG.isTraceEnabled()) LOG.trace("Stale sync exception", ioe);
+ }
+ }
}
} finally {
scope.close();
@@ -1047,7 +1085,7 @@ public class FSHLog implements WAL {
*/
protected Path computeFilename(final long filenum) {
if (filenum < 0) {
- throw new RuntimeException("wal file number can't be < 0");
+ throw new RuntimeException("WAL file number can't be < 0");
}
String child = logFilePrefix + WAL_FILE_NAME_DELIMITER + filenum + logFileSuffix;
return new Path(fullPathLogDir, child);
@@ -1079,7 +1117,7 @@ public class FSHLog implements WAL {
if (fileName == null) throw new IllegalArgumentException("file name can't be null");
if (!ourFiles.accept(fileName)) {
throw new IllegalArgumentException("The log file " + fileName +
- " doesn't belong to this wal. (" + toString() + ")");
+ " doesn't belong to this WAL. (" + toString() + ")");
}
final String fileNameString = fileName.toString();
String chompedPath = fileNameString.substring(prefixPathStr.length(),
@@ -1170,6 +1208,7 @@ public class FSHLog implements WAL {
* @param clusterIds that have consumed the change
* @return New log key.
*/
+ @SuppressWarnings("deprecation")
protected WALKey makeKey(byte[] encodedRegionName, TableName tableName, long seqnum,
long now, List
- * SyncFutures are immutable but recycled. Call {@link #reset(long, Span)} before use even
+ * SyncFutures are immutable but recycled. Call #reset(long, Span) before use even
* if it the first time, start the sync, then park the 'hitched' thread on a call to
- * {@link #get()}
+ * #get().
*/
@InterfaceAudience.Private
class SyncFuture {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java
index 61c7a97..6c580b2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java
@@ -31,11 +31,11 @@ import java.util.TreeMap;
import java.util.UUID;
import java.util.concurrent.CountDownLatch;
-
import org.apache.hadoop.hbase.util.ByteStringer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.exceptions.TimeoutIOException;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
@@ -49,6 +49,8 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.ByteString;
+
+
// imports for things that haven't moved from regionserver.wal yet.
import org.apache.hadoop.hbase.regionserver.wal.CompressionContext;
import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java
index 4e97738..b69f672 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.TableName;
@@ -219,7 +220,8 @@ public class TestFSErrorsExposed {
util.getDFSCluster().restartDataNodes();
} finally {
- util.getMiniHBaseCluster().killAll();
+ MiniHBaseCluster cluster = util.getMiniHBaseCluster();
+ if (cluster != null) cluster.killAll();
util.shutdownMiniCluster();
}
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFailedAppendAndSync.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFailedAppendAndSync.java
new file mode 100644
index 0000000..dd5df79
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFailedAppendAndSync.java
@@ -0,0 +1,255 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.DroppedSnapshotException;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
+import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALProvider.Writer;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.mockito.Mockito;
+import org.mockito.exceptions.verification.WantedButNotInvoked;
+
+/**
+ * Testing sync/append failures.
+ * Copied from TestHRegion.
+ */
+@Category({MediumTests.class})
+public class TestFailedAppendAndSync {
+ private static final Log LOG = LogFactory.getLog(TestFailedAppendAndSync.class);
+ @Rule public TestName name = new TestName();
+
+ private static final String COLUMN_FAMILY = "MyCF";
+ private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
+
+ HRegion region = null;
+ // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack)
+ private static HBaseTestingUtility TEST_UTIL;
+ public static Configuration CONF ;
+ private String dir;
+
+ // Test names
+ protected TableName tableName;
+
+ @Before
+ public void setup() throws IOException {
+ TEST_UTIL = HBaseTestingUtility.createLocalHTU();
+ CONF = TEST_UTIL.getConfiguration();
+ dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
+ tableName = TableName.valueOf(name.getMethodName());
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ EnvironmentEdgeManagerTestHelper.reset();
+ LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
+ TEST_UTIL.cleanupTestDir();
+ }
+
+ String getName() {
+ return name.getMethodName();
+ }
+
+ /**
+ * Reproduce locking up that happens when we get an exceptions appending and syncing.
+ * See HBASE-14317.
+ * First I need to set up some mocks for Server and RegionServerServices. I also need to
+ * set up a dodgy WAL that will throw an exception when we go to append to it.
+ */
+ @Test (timeout=300000)
+ public void testLockupAroundBadAssignSync() throws IOException {
+ // Dodgy WAL. Will throw exceptions when flags set.
+ class DodgyFSLog extends FSHLog {
+ volatile boolean throwSyncException = false;
+ volatile boolean throwAppendException = false;
+
+ public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
+ throws IOException {
+ super(fs, root, logDir, conf);
+ }
+
+ @Override
+ protected Writer createWriterInstance(Path path) throws IOException {
+ final Writer w = super.createWriterInstance(path);
+ return new Writer() {
+ @Override
+ public void close() throws IOException {
+ w.close();
+ }
+
+ @Override
+ public void sync() throws IOException {
+ if (throwSyncException) {
+ throw new IOException("FAKE! Failed to replace a bad datanode...");
+ }
+ w.sync();
+ }
+
+ @Override
+ public void append(Entry entry) throws IOException {
+ if (throwAppendException) {
+ throw new IOException("FAKE! Failed to replace a bad datanode...");
+ }
+ w.append(entry);
+ }
+
+ @Override
+ public long getLength() throws IOException {
+ return w.getLength();
+ }
+ };
+ }
+ }
+
+ // Make up mocked server and services.
+ Server server = mock(Server.class);
+ when(server.getConfiguration()).thenReturn(CONF);
+ when(server.isStopped()).thenReturn(false);
+ when(server.isAborted()).thenReturn(false);
+ RegionServerServices services = mock(RegionServerServices.class);
+ // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
+ // the test.
+ FileSystem fs = FileSystem.get(CONF);
+ Path rootDir = new Path(dir + getName());
+ DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
+ LogRoller logRoller = new LogRoller(server, services);
+ logRoller.addWAL(dodgyWAL);
+ logRoller.start();
+
+ boolean threwOnSync = false;
+ boolean threwOnAppend = false;
+ boolean threwOnBoth = false;
+
+ HRegion region = initHRegion(tableName, null, null, dodgyWAL);
+ try {
+ // Get some random bytes.
+ byte[] value = Bytes.toBytes(getName());
+ try {
+ // First get something into memstore
+ Put put = new Put(value);
+ put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
+ region.put(put);
+ } catch (IOException ioe) {
+ fail();
+ }
+
+ try {
+ dodgyWAL.throwAppendException = true;
+ dodgyWAL.throwSyncException = false;
+ Put put = new Put(value);
+ put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
+ region.put(put);
+ } catch (IOException ioe) {
+ threwOnAppend = true;
+ }
+ // When we get to here.. we should be ok. A new WAL has been put in place. There were no
+ // appends to sync. We should be able to continue.
+
+ try {
+ dodgyWAL.throwAppendException = true;
+ dodgyWAL.throwSyncException = true;
+ Put put = new Put(value);
+ put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
+ region.put(put);
+ } catch (IOException ioe) {
+ threwOnBoth = true;
+ }
+ // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
+ // to just continue.
+
+ // So, should be no abort at this stage. Verify.
+ Mockito.verify(server, Mockito.atLeast(0)).
+ abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
+ try {
+ dodgyWAL.throwAppendException = false;
+ dodgyWAL.throwSyncException = true;
+ Put put = new Put(value);
+ put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
+ region.put(put);
+ } catch (IOException ioe) {
+ threwOnSync = true;
+ }
+ // An append in the WAL but the sync failed is a server abort condition. That is our
+ // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
+ // happens. If it don't we'll timeout the whole test. That is fine.
+ while (true) {
+ try {
+ Mockito.verify(server, Mockito.atLeast(1)).
+ abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
+ break;
+ } catch (WantedButNotInvoked t) {
+ Threads.sleep(1);
+ }
+ }
+ } finally {
+ // To stop logRoller, its server has to say it is stopped.
+ Mockito.when(server.isStopped()).thenReturn(true);
+ if (logRoller != null) logRoller.interrupt();
+ if (region != null) {
+ try {
+ region.close(true);
+ } catch (DroppedSnapshotException e) {
+ LOG.info("On way out; expected!", e);
+ }
+ }
+ if (dodgyWAL != null) dodgyWAL.close();
+ assertTrue("The regionserver should have thrown an exception", threwOnBoth);
+ assertTrue("The regionserver should have thrown an exception", threwOnAppend);
+ assertTrue("The regionserver should have thrown an exception", threwOnSync);
+ }
+ }
+
+ /**
+ * @return A region on which you must call
+ * {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done.
+ */
+ public HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, WAL wal)
+ throws IOException {
+ return TEST_UTIL.createLocalHRegion(tableName.getName(), startKey, stopKey,
+ getName(), CONF, false, Durability.SYNC_WAL,
+ wal, COLUMN_FAMILY_BYTES);
+ }
+}
\ No newline at end of file
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
index 6d3b4b2..5add20e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
@@ -33,7 +33,9 @@ import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
-import static org.mockito.Matchers.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyBoolean;
+import static org.mockito.Matchers.anyLong;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.spy;
@@ -129,7 +131,13 @@ import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl;
import org.apache.hadoop.hbase.regionserver.Region.RowLock;
import org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem;
import org.apache.hadoop.hbase.regionserver.handler.FinishRegionRecoveringHandler;
-import org.apache.hadoop.hbase.regionserver.wal.*;
+import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
+import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
+import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
+import org.apache.hadoop.hbase.regionserver.wal.MetricsWALSource;
+import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
+import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
+import org.apache.hadoop.hbase.regionserver.wal.WALUtil;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.test.MetricsAssertHelper;
import org.apache.hadoop.hbase.testclassification.MediumTests;
@@ -147,6 +155,7 @@ import org.apache.hadoop.hbase.wal.WAL;
import org.apache.hadoop.hbase.wal.WALFactory;
import org.apache.hadoop.hbase.wal.WALKey;
import org.apache.hadoop.hbase.wal.WALProvider;
+import org.apache.hadoop.hbase.wal.WALProvider.Writer;
import org.apache.hadoop.hbase.wal.WALSplitter;
import org.junit.After;
import org.junit.Assert;
@@ -256,6 +265,8 @@ public class TestHRegion {
HRegion.closeHRegion(region);
}
+
+
/*
* This test is for verifying memstore snapshot size is correctly updated in case of rollback
* See HBASE-10845
@@ -335,7 +346,8 @@ public class TestHRegion {
// save normalCPHost and replaced by mockedCPHost, which will cancel flush requests
RegionCoprocessorHost normalCPHost = region.getCoprocessorHost();
RegionCoprocessorHost mockedCPHost = Mockito.mock(RegionCoprocessorHost.class);
- when(mockedCPHost.preFlush(isA(HStore.class), isA(InternalScanner.class))).thenReturn(null);
+ when(mockedCPHost.preFlush(Mockito.isA(HStore.class), Mockito.isA(InternalScanner.class))).
+ thenReturn(null);
region.setCoprocessorHost(mockedCPHost);
region.put(put);
region.flush(true);
@@ -400,9 +412,18 @@ public class TestHRegion {
} catch (DroppedSnapshotException dse) {
// What we are expecting
region.closing.set(false); // this is needed for the rest of the test to work
+ } catch (Exception e) {
+ // What we are expecting
+ region.closing.set(false); // this is needed for the rest of the test to work
}
// Make it so all writes succeed from here on out
ffs.fault.set(false);
+ // WAL is bad because of above faulty fs. Roll WAL.
+ try {
+ region.getWAL().rollWriter(true);
+ } catch (Exception e) {
+ int x = 0;
+ }
// Check sizes. Should still be the one entry.
Assert.assertEquals(sizeOfOnePut, region.getMemstoreSize());
// Now add two entries so that on this next flush that fails, we can see if we
@@ -418,6 +439,8 @@ public class TestHRegion {
region.flush(true);
// Make sure our memory accounting is right.
Assert.assertEquals(sizeOfOnePut * 2, region.getMemstoreSize());
+ } catch (Exception e) {
+ int x = 0;
} finally {
HRegion.closeHRegion(region);
}
@@ -465,12 +488,13 @@ public class TestHRegion {
// Now try close on top of a failing flush.
region.close();
fail();
- } catch (DroppedSnapshotException dse) {
+ } catch (IOException dse) {
// Expected
LOG.info("Expected DroppedSnapshotException");
} finally {
// Make it so all writes succeed from here on out so can close clean
ffs.fault.set(false);
+ region.getWAL().rollWriter(true);
HRegion.closeHRegion(region);
}
return null;
@@ -898,7 +922,7 @@ public class TestHRegion {
// now verify that the flush markers are written
wal.shutdown();
- WAL.Reader reader = wals.createReader(fs, DefaultWALProvider.getCurrentFileName(wal),
+ WAL.Reader reader = WALFactory.createReader(fs, DefaultWALProvider.getCurrentFileName(wal),
TEST_UTIL.getConfiguration());
try {
List First I need to set up some mocks for Server and RegionServerServices. I also need to
+ * set up a dodgy WAL that will throw an exception when we go to append to it.
+ */
+ @Test (timeout=30000)
+ public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
+ // A WAL that we can have throw exceptions when a flag is set.
+ class DodgyFSLog extends FSHLog {
+ // Set this when want the WAL to start throwing exceptions.
+ volatile boolean throwException = false;
+
+ // Latch to hold up processing until after another operation has had time to run.
+ CountDownLatch latch = new CountDownLatch(1);
+
+ public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
+ throws IOException {
+ super(fs, root, logDir, conf);
+ }
+
+ @Override
+ protected void afterCreatingZigZagLatch() {
+ // If throwException set, then append will throw an exception causing the WAL to be
+ // rolled. We'll come in here. Hold up processing until a sync can get in before
+ // the zigzag has time to complete its setup and get its own sync in. This is what causes
+ // the lock up we've seen in production.
+ if (throwException) {
+ try {
+ LOG.info("LATCHED");
+ this.latch.await();
+ } catch (InterruptedException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+ }
+
+ @Override
+ protected void beforeWaitOnSafePoint() {
+ if (throwException) {
+ LOG.info("COUNTDOWN");
+ // Don't countdown latch until someone waiting on it.
+ while (this.latch.getCount() <= 0) {
+ Threads.sleep(10);
+ }
+ this.latch.countDown();
+ }
+ }
+
+ @Override
+ protected Writer createWriterInstance(Path path) throws IOException {
+ final Writer w = super.createWriterInstance(path);
+ return new Writer() {
+ @Override
+ public void close() throws IOException {
+ w.close();
+ }
+
+ @Override
+ public void sync() throws IOException {
+ if (throwException) {
+ throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
+ }
+ w.sync();
+ }
+
+ @Override
+ public void append(Entry entry) throws IOException {
+ if (throwException) {
+ throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
+ }
+ w.append(entry);
+ }
+
+ @Override
+ public long getLength() throws IOException {
+ return w.getLength();
+ }
+ };
+ }
+ }
+
+ // Mocked up server and regionserver services. Needed below.
+ Server server = Mockito.mock(Server.class);
+ Mockito.when(server.getConfiguration()).thenReturn(CONF);
+ Mockito.when(server.isStopped()).thenReturn(false);
+ Mockito.when(server.isAborted()).thenReturn(false);
+ RegionServerServices services = Mockito.mock(RegionServerServices.class);
+
+ // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test.
+ FileSystem fs = FileSystem.get(CONF);
+ Path rootDir = new Path(dir + getName());
+ DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
+ Path originalWAL = dodgyWAL.getCurrentFileName();
+ // I need a log roller running.
+ LogRoller logRoller = new LogRoller(server, services);
+ logRoller.addWAL(dodgyWAL);
+ // There is no 'stop' once a logRoller is running.. it just dies.
+ logRoller.start();
+ // Now get a region and start adding in edits.
+ HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
+ final HRegion region = initHRegion(tableName, null, null, dodgyWAL);
+ byte [] bytes = Bytes.toBytes(getName());
+ try {
+ // First get something into memstore. Make a Put and then pull the Cell out of it. Will
+ // manage append and sync carefully in below to manufacture hang. We keep adding same
+ // edit. WAL subsystem doesn't care.
+ Put put = new Put(bytes);
+ put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
+ WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName());
+ WALEdit edit = new WALEdit();
+ List