From d95e995d1a4cfe501dab16f49a17b4d7744c4de8 Mon Sep 17 00:00:00 2001 From: stack Date: Thu, 10 Sep 2015 13:40:03 -0700 Subject: [PATCH] always stamp --- .../org/apache/hadoop/hbase/regionserver/wal/FSHLog.java | 14 ++++++++++---- .../src/main/java/org/apache/hadoop/hbase/wal/WALKey.java | 8 ++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java index c551a94..65076fb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java @@ -1737,11 +1737,16 @@ public class FSHLog implements WAL { try { FSWALEntry entry = truck.unloadFSWALEntryPayload(); // If already an exception, do not try to append. Throw. - if (this.exception != null) throw this.exception; + if (this.exception != null) { + // Stamp the sequenceid even though we are failing this append.. we need to do this + // to close the latch held down in WALKey... otherwise it will just hang out. + entry.stampRegionSequenceId(); + throw this.exception; + } append(entry); } catch (Exception e) { // Failed append. Record the exception. Throw it from here on out til new WAL in place - this.exception = new DamagedWALException(e); + this.exception = e; // If append fails, presume any pending syncs will fail too; let all waiting handlers // know of the exception. cleanupOutstandingSyncsOnException(sequence, this.exception); @@ -1883,9 +1888,10 @@ public class FSHLog implements WAL { // Update metrics. postAppend(entry, EnvironmentEdgeManager.currentTime() - start); } catch (Exception e) { - LOG.warn("Could not append. Requesting close of WAL", e); + String msg = "Failed appending " + regionSequenceId + ", requesting roll of WAL"; + LOG.warn(msg, e); requestLogRoll(); - throw e; + throw new DamagedWALException(msg, e); } numEntries.incrementAndGet(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java index 9b3dede..74284e0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALKey.java @@ -315,8 +315,12 @@ public class WALKey implements SequenceId, Comparable { */ public long getSequenceId(final long maxWaitForSeqId) throws IOException { // TODO: This implementation waiting on a latch is problematic because if a higher level - // determines we should stop or abort, there is not global list of all these blocked WALKeys - // waiting on a sequence id; they can't be cancelled... interrupted. See getNextSequenceId + // determines we should stop or abort, there is no global list of all these blocked WALKeys + // waiting on a sequence id; they can't be cancelled... interrupted. See getNextSequenceId. + // + // UPDATE: I think we can remove the timeout now we are stamping all walkeys with sequenceid, + // even those that have failed (previously we were not... so they would just hang out...). + // St.Ack 20150910 try { if (maxWaitForSeqId < 0) { this.seqNumAssignedLatch.await(); -- 2.2.1