Index: src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestDurability.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestDurability.java (revision 1544060) +++ src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestDurability.java (working copy) @@ -145,7 +145,7 @@ HLog.Entry entry = new HLog.Entry(); while (reader.next(entry) != null) count++; reader.close(); - assertEquals(expected, count); + assertEquals(expected, count - 1);// one extra, for the initial Dummy entry } // lifted from TestAtomicOperation Index: src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java (revision 1544060) +++ src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java (working copy) @@ -26,6 +26,7 @@ import java.io.IOException; import java.lang.reflect.Method; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -269,7 +270,7 @@ int count = 0; HLog.Entry entry = new HLog.Entry(); while ((entry = reader.next(entry)) != null) count++; - assertEquals(total, count); + assertEquals(total + 1, count); // one extra, for the initial Dummy entry. reader.close(); // Add test that checks to see that an open of a Reader works on a file // that has had a sync done on it. @@ -288,7 +289,7 @@ reader = HLog.getReader(fs, walPath, conf); count = 0; while((entry = reader.next(entry)) != null) count++; - assertEquals(total * 2, count); + assertEquals(total * 2 + 1, count); // one extra, for the initial Dummy entry. // Now do a test that ensures stuff works when we go over block boundary, // especially that we return good length on file. final byte [] value = new byte[1025 * 1024]; // Make a 1M value. @@ -302,14 +303,14 @@ reader = HLog.getReader(fs, walPath, conf); count = 0; while((entry = reader.next(entry)) != null) count++; - assertEquals(total * 3, count); + assertEquals(total * 3 + 1, count); // one extra, for the initial Dummy entry. reader.close(); // Close it and ensure that closed, Reader gets right length also. wal.close(); reader = HLog.getReader(fs, walPath, conf); count = 0; while((entry = reader.next(entry)) != null) count++; - assertEquals(total * 3, count); + assertEquals(total * 3 + 1, count); // one extra, for the initial Dummy entry. reader.close(); } finally { if (wal != null) wal.closeAndDelete(); @@ -488,6 +489,9 @@ int count = 0; HLog.Entry entry = new HLog.Entry(); while (reader.next(entry) != null) { + if (Arrays.equals(entry.getKey().getEncodedRegionName(), HLog.DUMMY)) { + continue; + } count++; assertTrue("Should be one KeyValue per WALEdit", entry.getEdit().getKeyValues().size() == 1); @@ -537,17 +541,21 @@ reader = HLog.getReader(fs, filename, conf); // Above we added all columns on a single row so we only read one // entry in the below... thats why we have '1'. - for (int i = 0; i < 1; i++) { + for (int count = 0; count < 1;) { HLog.Entry entry = reader.next(null); if (entry == null) break; HLogKey key = entry.getKey(); WALEdit val = entry.getEdit(); + if (Arrays.equals(key.getEncodedRegionName(), HLog.DUMMY)) { + continue; + } assertTrue(Bytes.equals(info.getEncodedNameAsBytes(), key.getEncodedRegionName())); assertTrue(Bytes.equals(tableName, key.getTablename())); KeyValue kv = val.getKeyValues().get(0); assertTrue(Bytes.equals(row, kv.getRow())); - assertEquals((byte)(i + '0'), kv.getValue()[0]); + assertEquals((byte)(count + '0'), kv.getValue()[0]); System.out.println(key + " " + val); + count++; } HLog.Entry entry = null; while ((entry = reader.next(null)) != null) { @@ -605,6 +613,7 @@ log = null; // Now open a reader on the log and assert append worked. reader = HLog.getReader(fs, filename, conf); + reader.next(); // skip the dummy entry. HLog.Entry entry = reader.next(); assertEquals(COL_COUNT, entry.getEdit().size()); int idx = 0; Index: src/test/java/org/apache/hadoop/hbase/regionserver/wal/HLogPerformanceEvaluation.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/wal/HLogPerformanceEvaluation.java (revision 1544060) +++ src/test/java/org/apache/hadoop/hbase/regionserver/wal/HLogPerformanceEvaluation.java (working copy) @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.regionserver.wal; +import java.util.Arrays; import java.util.Map; import java.util.List; import java.util.Random; @@ -247,6 +248,9 @@ while (true) { Entry e = reader.next(); if (e == null) break; + if (Arrays.equals(e.getKey().getEncodedRegionName(), HLog.DUMMY)) { + continue; + } count++; long seqid = e.getKey().getLogSeqNum(); if (verbose) LOG.info("seqid=" + seqid); Index: src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java (revision 1544060) +++ src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java (working copy) @@ -26,6 +26,7 @@ import java.lang.reflect.InvocationTargetException; import java.text.ParseException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -436,6 +437,9 @@ try { while ((entry = getNextLogLine(in,logPath, skipErrors)) != null) { byte[] region = entry.getKey().getEncodedRegionName(); + if (Arrays.equals(region, HLog.DUMMY)) { + continue; + } Object o = logWriters.get(region); if (o == BAD_WRITER) { continue; @@ -901,6 +905,9 @@ */ void appendEntry(Entry entry) throws InterruptedException, IOException { HLogKey key = entry.getKey(); + if (Arrays.equals(key.getEncodedRegionName(), HLog.DUMMY)) { + return; + } RegionEntryBuffer buffer; long incrHeap; Index: src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (revision 1544060) +++ src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (working copy) @@ -67,6 +67,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.HasThread; import org.apache.hadoop.hbase.util.Threads; @@ -263,6 +264,7 @@ */ private static final Pattern pattern = Pattern.compile(".*\\.\\d*("+HLog.META_HLOG_FILE_EXTN+")*"); + public static final byte[] DUMMY = Bytes.toBytes(""); static byte [] COMPLETE_CACHE_FLUSH; static { @@ -625,6 +627,18 @@ FSDataOutputStream nextHdfsOut = null; if (nextWriter instanceof SequenceFileLogWriter) { nextHdfsOut = ((SequenceFileLogWriter)nextWriter).getWriterFSDataOutputStream(); + // append a dummy entry and sync. So we perform the costly + // allocateBlock and sync before we get the lock to roll writers. + WALEdit edit = new WALEdit(); + HLogKey key = makeKey(DUMMY /* regionName */, DUMMY /* tableName */, + 0, EnvironmentEdgeManager.currentTimeMillis(), HConstants.DEFAULT_CLUSTER_ID); + try { + nextWriter.append(new HLog.Entry(key, edit)); + nextWriter.sync(); + } catch (IOException e) { + //optimization failed, no need to abort here. + LOG.warn("write DUMMY entry failed", e); + } } synchronized (updateLock) { Index: src/main/java/org/apache/hadoop/hbase/mapreduce/HLogInputFormat.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/mapreduce/HLogInputFormat.java (revision 1544060) +++ src/main/java/org/apache/hadoop/hbase/mapreduce/HLogInputFormat.java (working copy) @@ -22,6 +22,7 @@ import java.io.EOFException; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -167,7 +168,9 @@ return false; } } - while(temp != null && temp.getKey().getWriteTime() < startTime); + while (temp != null + && (Arrays.equals(temp.getKey().getEncodedRegionName(), HLog.DUMMY) || temp.getKey() + .getWriteTime() < startTime)); if (temp == null) { if (i > 0) LOG.info("Skipped " + i + " entries.");