Index: hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java (revision 1526237) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java (working copy) @@ -396,7 +396,7 @@ // the configured value 2. assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null); - batchWriteAndWait(table, 3, false, 10000); + batchWriteAndWait(table, 3, false, 14000); assertTrue("LowReplication Roller should've been disabled, current replication=" + ((FSHLog) log).getLogReplication(), !log.isLowReplicationRollEnabled()); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java (revision 1526237) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java (working copy) @@ -27,7 +27,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -40,6 +39,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -50,12 +50,12 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Syncable; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; @@ -125,7 +125,6 @@ private volatile long syncedTillHere = 0; private long lastDeferredTxid; private final Path oldLogDir; - private volatile boolean logRollRunning; private WALCoprocessorHost coprocessorHost; @@ -162,7 +161,7 @@ * This lock makes sure only one log roll runs at the same time. Should not be taken while * any other lock is held. We don't just use synchronized because that results in bogus and * tedious findbugs warning when it thinks synchronized controls writer thread safety */ - private final Object rollWriterLock = new Object(); + private final ReentrantLock rollWriterLock = new ReentrantLock(true); /** * Map of encoded region names to their most recent sequence/edit id in their memstore. @@ -473,7 +472,8 @@ @Override public byte [][] rollWriter(boolean force) throws FailedLogCloseException, IOException { - synchronized (rollWriterLock) { + rollWriterLock.lock(); + try { // Return if nothing to flush. if (!force && this.writer != null && this.numEntries.get() <= 0) { return null; @@ -484,7 +484,6 @@ return null; } try { - this.logRollRunning = true; if (!closeBarrier.beginOp()) { LOG.debug("HLog closing. Skipping rolling of writer"); return regionsToFlush; @@ -541,10 +540,11 @@ regionsToFlush = getRegionsToForceFlush(); } } finally { - this.logRollRunning = false; closeBarrier.endOp(); } return regionsToFlush; + } finally { + rollWriterLock.unlock(); } } @@ -1082,17 +1082,18 @@ this.syncedTillHere = Math.max(this.syncedTillHere, doneUpto); this.metrics.finishSync(EnvironmentEdgeManager.currentTimeMillis() - now); - // TODO: preserving the old behavior for now, but this check is strange. It's not - // protected by any locks here, so for all we know rolling locks might start - // as soon as we enter the "if". Is this best-effort optimization check? - if (!this.logRollRunning) { - checkLowReplication(); + if (rollWriterLock.tryLock()) { try { - if (tempWriter.getLength() > this.logrollsize) { - requestLogRoll(); + checkLowReplication(); + try { + if (tempWriter.getLength() > this.logrollsize) { + requestLogRoll(); + } + } catch (IOException x) { + LOG.debug("Log roll failed and will be retried. (This is not an error)"); } - } catch (IOException x) { - LOG.debug("Log roll failed and will be retried. (This is not an error)"); + } finally { + rollWriterLock.unlock(); } } } catch (IOException e) {