From ce99e56d418aaf66f7f9a2af9b54a638aedf6f5b Mon Sep 17 00:00:00 2001 From: zhangduo Date: Sat, 22 Oct 2016 14:42:26 +0800 Subject: [PATCH] HBASE-15684 Fix the broken log file size accounting --- .../hbase/regionserver/wal/AbstractFSWAL.java | 59 +++++++++++++++------- .../hadoop/hbase/wal/AbstractFSWALProvider.java | 10 +++- .../regionserver/wal/AbstractTestLogRolling.java | 32 +++++++----- 3 files changed, 69 insertions(+), 32 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java index e33ae33..c1e8019 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.DrainBarrier; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.wal.WAL; import org.apache.hadoop.hbase.wal.WALFactory; import org.apache.hadoop.hbase.wal.WALKey; @@ -222,12 +223,31 @@ public abstract class AbstractFSWAL implements WAL { } }; + private static final class WalProps { + + /** + * Map the encoded region name to the highest sequence id. Contain all the regions it has entries of + */ + public final Map encodedName2HighestSequenceId; + + /** + * The log file size. Notice that the size may not be accurate if we do asynchronous close in + * sub classes. + */ + public final long logSize; + + public WalProps(Map encodedName2HighestSequenceId, long logSize) { + this.encodedName2HighestSequenceId = encodedName2HighestSequenceId; + this.logSize = logSize; + } + } + /** - * Map of WAL log file to the latest sequence ids of all regions it has entries of. The map is - * sorted by the log file creation timestamp (contained in the log file name). + * Map of WAL log file to properties. The map is sorted by the log file creation timestamp + * (contained in the log file name). */ - protected ConcurrentNavigableMap> byWalRegionSequenceIds = - new ConcurrentSkipListMap>(LOG_NAME_COMPARATOR); + protected ConcurrentNavigableMap walFile2Props = new ConcurrentSkipListMap<>( + LOG_NAME_COMPARATOR); /** * Map of {@link SyncFuture}s keyed by Handler objects. Used so we reuse SyncFutures. @@ -503,7 +523,7 @@ public abstract class AbstractFSWAL implements WAL { // public only until class moves to o.a.h.h.wal /** @return the number of rolled log files */ public int getNumRolledLogFiles() { - return byWalRegionSequenceIds.size(); + return walFile2Props.size(); } // public only until class moves to o.a.h.h.wal @@ -523,8 +543,9 @@ public abstract class AbstractFSWAL implements WAL { byte[][] regions = null; int logCount = getNumRolledLogFiles(); if (logCount > this.maxLogs && logCount > 0) { - Map.Entry> firstWALEntry = this.byWalRegionSequenceIds.firstEntry(); - regions = this.sequenceIdAccounting.findLower(firstWALEntry.getValue()); + Map.Entry firstWALEntry = this.walFile2Props.firstEntry(); + regions = this.sequenceIdAccounting + .findLower(firstWALEntry.getValue().encodedName2HighestSequenceId); } if (regions != null) { StringBuilder sb = new StringBuilder(); @@ -544,27 +565,27 @@ public abstract class AbstractFSWAL implements WAL { * Archive old logs. A WAL is eligible for archiving if all its WALEdits have been flushed. */ private void cleanOldLogs() throws IOException { - List logsToArchive = null; + List> logsToArchive = null; // For each log file, look at its Map of regions to highest sequence id; if all sequence ids // are older than what is currently in memory, the WAL can be GC'd. - for (Map.Entry> e : this.byWalRegionSequenceIds.entrySet()) { + for (Map.Entry e : this.walFile2Props.entrySet()) { Path log = e.getKey(); - Map sequenceNums = e.getValue(); + Map sequenceNums = e.getValue().encodedName2HighestSequenceId; if (this.sequenceIdAccounting.areAllLower(sequenceNums)) { if (logsToArchive == null) { - logsToArchive = new ArrayList(); + logsToArchive = new ArrayList<>(); } - logsToArchive.add(log); + logsToArchive.add(Pair.newPair(log, e.getValue().logSize)); if (LOG.isTraceEnabled()) { LOG.trace("WAL file ready for archiving " + log); } } } if (logsToArchive != null) { - for (Path p : logsToArchive) { - this.totalLogSize.addAndGet(-this.fs.getFileStatus(p).getLen()); - archiveLogFile(p); - this.byWalRegionSequenceIds.remove(p); + for (Pair logAndSize : logsToArchive) { + this.totalLogSize.addAndGet(-logAndSize.getSecond()); + archiveLogFile(logAndSize.getFirst()); + this.walFile2Props.remove(logAndSize.getFirst()); } } } @@ -617,12 +638,12 @@ public abstract class AbstractFSWAL implements WAL { Path replaceWriter(Path oldPath, Path newPath, W nextWriter) throws IOException { TraceScope scope = Trace.startSpan("FSHFile.replaceWriter"); try { - long oldFileLen = 0L; - doReplaceWriter(oldPath, newPath, nextWriter); + long oldFileLen = doReplaceWriter(oldPath, newPath, nextWriter); int oldNumEntries = this.numEntries.get(); final String newPathString = (null == newPath ? null : FSUtils.getPath(newPath)); if (oldPath != null) { - this.byWalRegionSequenceIds.put(oldPath, this.sequenceIdAccounting.resetHighest()); + this.walFile2Props.put(oldPath, + new WalProps(this.sequenceIdAccounting.resetHighest(), oldFileLen)); this.totalLogSize.addAndGet(oldFileLen); LOG.info("Rolled WAL " + FSUtils.getPath(oldPath) + " with entries=" + oldNumEntries + ", filesize=" + StringUtils.byteDesc(oldFileLen) + "; new WAL " + newPathString); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java index 51d7417..76a6415 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java @@ -169,7 +169,7 @@ public abstract class AbstractFSWALProvider> implemen /** * iff the given WALFactory is using the DefaultWALProvider for meta and/or non-meta, count the - * size of files (rolled and active). if either of them aren't, count 0 for that provider. + * size of files (only rolled). if either of them aren't, count 0 for that provider. */ @Override public long getLogFileSize() { @@ -186,6 +186,14 @@ public abstract class AbstractFSWALProvider> implemen } /** + * returns the size of rolled WAL files. + */ + @VisibleForTesting + public static long getLogFileSize(WAL wal) { + return ((AbstractFSWAL) wal).getLogFileSize(); + } + + /** * return the current filename from the current wal. */ @VisibleForTesting diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/AbstractTestLogRolling.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/AbstractTestLogRolling.java index 046071f..1aa077d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/AbstractTestLogRolling.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/AbstractTestLogRolling.java @@ -46,7 +46,6 @@ import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; -import org.apache.hadoop.hbase.wal.FSHLogProvider; import org.apache.hadoop.hbase.wal.WAL; import org.apache.hadoop.hbase.wal.WALFactory; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -172,6 +171,14 @@ public abstract class AbstractTestLogRolling { } } + private void assertLogFileSize(WAL log) { + if (AbstractFSWALProvider.getNumRolledLogFiles(log) > 0) { + assertTrue(AbstractFSWALProvider.getLogFileSize(log) > 0); + } else { + assertEquals(0, AbstractFSWALProvider.getLogFileSize(log)); + } + } + /** * Tests that logs are deleted * @throws IOException @@ -182,23 +189,24 @@ public abstract class AbstractTestLogRolling { this.tableName = getName(); // TODO: Why does this write data take for ever? startAndWriteData(); - HRegionInfo region = - server.getOnlineRegions(TableName.valueOf(tableName)).get(0).getRegionInfo(); + HRegionInfo region = server.getOnlineRegions(TableName.valueOf(tableName)).get(0) + .getRegionInfo(); final WAL log = server.getWAL(region); - LOG.info("after writing there are " + AbstractFSWALProvider.getNumRolledLogFiles(log) + - " log files"); + LOG.info("after writing there are " + AbstractFSWALProvider.getNumRolledLogFiles(log) + " log files"); + assertLogFileSize(log); - // flush all regions - for (Region r: server.getOnlineRegionsLocalContext()) { - r.flush(true); - } + // flush all regions + for (Region r : server.getOnlineRegionsLocalContext()) { + r.flush(true); + } - // Now roll the log - log.rollWriter(); + // Now roll the log + log.rollWriter(); int count = AbstractFSWALProvider.getNumRolledLogFiles(log); LOG.info("after flushing all regions and rolling logs there are " + count + " log files"); - assertTrue(("actual count: " + count), count <= 2); + assertTrue(("actual count: " + count), count <= 2); + assertLogFileSize(log); } protected String getName() { -- 1.9.1