From 425bb2c01004afa8c49779433bf2bb59677b4305 Mon Sep 17 00:00:00 2001 From: Andrew Purtell Date: Thu, 6 Nov 2014 11:04:48 -0800 Subject: [PATCH 2/3] HBASE-12424 Finer grained logging and metrics for split transactions Record more details of the split transaction in the journal and dump them into the log at INFO level when complete (or failed) --- .../hbase/regionserver/RegionCoprocessorHost.java | 1 + .../hadoop/hbase/regionserver/SplitRequest.java | 2 + .../hbase/regionserver/SplitTransaction.java | 106 ++++++++++++++++++--- 3 files changed, 96 insertions(+), 13 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java index 7e606e4..d5f1ff8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java @@ -567,6 +567,7 @@ public class RegionCoprocessorHost * Invoked just before a split * @throws IOException */ + // TODO: Deprecate this public void preSplit() throws IOException { execOperation(coprocessors.isEmpty() ? null : new RegionOperation() { @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java index e704d92..887b6ab 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java @@ -136,6 +136,8 @@ class SplitRequest implements Runnable { + st.getSecondDaughter().getRegionNameAsString() + ". Split took " + StringUtils.formatTimeDiff(EnvironmentEdgeManager.currentTime(), startTime)); } + // Always log the split transaction journal + LOG.info("Split transaction journal:\n\t" + StringUtils.join("\n\t", st.getJournal())); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java index 44a692f..4339fe0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java @@ -96,7 +96,23 @@ public class SplitTransaction { * Each enum is a step in the split transaction. Used to figure how much * we need to rollback. */ - enum JournalEntry { + static enum JournalEntryType { + /** + * Started + */ + STARTED, + /** + * Prepared (after table lock) + */ + PREPARED, + /** + * Before preSplit coprocessor hook + */ + BEFORE_PRE_SPLIT_HOOK, + /** + * After preSplit coprocessor hook + */ + AFTER_PRE_SPLIT_HOOK, /** * Set region as in transition, set it into SPLITTING state. */ @@ -122,6 +138,22 @@ public class SplitTransaction { */ STARTED_REGION_B_CREATION, /** + * Opened the first daughter region + */ + OPENED_REGION_A, + /** + * Opened the second daughter region + */ + OPENED_REGION_B, + /** + * Before postSplit coprocessor hook + */ + BEFORE_POST_SPLIT_HOOK, + /** + * After postSplit coprocessor hook + */ + AFTER_POST_SPLIT_HOOK, + /** * Point of no return. * If we got here, then transaction is not recoverable other than by * crashing out the regionserver. @@ -129,6 +161,29 @@ public class SplitTransaction { PONR } + static class JournalEntry { + public JournalEntryType type; + public long timestamp; + + public JournalEntry(JournalEntryType type) { + this(type, System.currentTimeMillis()); + } + + public JournalEntry(JournalEntryType type, long timestamp) { + this.type = type; + this.timestamp = timestamp; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(type); + sb.append(" at "); + sb.append(timestamp); + return sb.toString(); + } + } + /* * Journal of how far the split transaction has progressed. */ @@ -142,6 +197,7 @@ public class SplitTransaction { public SplitTransaction(final HRegion r, final byte [] splitrow) { this.parent = r; this.splitrow = splitrow; + this.journal.add(new JournalEntry(JournalEntryType.STARTED)); } /** @@ -167,6 +223,7 @@ public class SplitTransaction { long rid = getDaughterRegionIdTimestamp(hri); this.hri_a = new HRegionInfo(hri.getTable(), startKey, this.splitrow, false, rid); this.hri_b = new HRegionInfo(hri.getTable(), this.splitrow, endKey, false, rid); + this.journal.add(new JournalEntry(JournalEntryType.PREPARED)); return true; } @@ -209,16 +266,17 @@ public class SplitTransaction { assert !this.parent.lock.writeLock().isHeldByCurrentThread(): "Unsafe to hold write lock while performing RPCs"; - // Coprocessor callback - if (this.parent.getCoprocessorHost() != null) { - this.parent.getCoprocessorHost().preSplit(); - } + journal.add(new JournalEntry(JournalEntryType.BEFORE_PRE_SPLIT_HOOK)); // Coprocessor callback if (this.parent.getCoprocessorHost() != null) { + // TODO: Remove one of these + this.parent.getCoprocessorHost().preSplit(); this.parent.getCoprocessorHost().preSplit(this.splitrow); } + journal.add(new JournalEntry(JournalEntryType.AFTER_PRE_SPLIT_HOOK)); + // If true, no cluster to write meta edits to or to update znodes in. boolean testing = server == null? true: server.getConfiguration().getBoolean("hbase.testing.nocluster", false); @@ -261,7 +319,7 @@ public class SplitTransaction { // OfflineParentInMeta timeout,this will cause regionserver exit,and then // master ServerShutdownHandler will fix daughter & avoid data loss. (See // HBase-4562). - this.journal.add(JournalEntry.PONR); + this.journal.add(new JournalEntry(JournalEntryType.PONR)); // Edit parent in meta. Offlines parent region and adds splita and splitb // as an atomic update. See HBASE-7721. This update to META makes the region @@ -284,10 +342,10 @@ public class SplitTransaction { throw new IOException("Failed to get ok from master to split " + parent.getRegionNameAsString()); } - this.journal.add(JournalEntry.SET_SPLITTING); + this.journal.add(new JournalEntry(JournalEntryType.SET_SPLITTING)); this.parent.getRegionFileSystem().createSplitsDir(); - this.journal.add(JournalEntry.CREATE_SPLIT_DIR); + this.journal.add(new JournalEntry(JournalEntryType.CREATE_SPLIT_DIR)); Map> hstoreFilesToSplit = null; Exception exceptionToThrow = null; @@ -305,7 +363,7 @@ public class SplitTransaction { exceptionToThrow = closedByOtherException; } if (exceptionToThrow != closedByOtherException) { - this.journal.add(JournalEntry.CLOSED_PARENT_REGION); + this.journal.add(new JournalEntry(JournalEntryType.CLOSED_PARENT_REGION)); } if (exceptionToThrow != null) { if (exceptionToThrow instanceof IOException) throw (IOException)exceptionToThrow; @@ -314,7 +372,7 @@ public class SplitTransaction { if (!testing) { services.removeFromOnlineRegions(this.parent, null); } - this.journal.add(JournalEntry.OFFLINED_PARENT); + this.journal.add(new JournalEntry(JournalEntryType.OFFLINED_PARENT)); // TODO: If splitStoreFiles were multithreaded would we complete steps in // less elapsed time? St.Ack 20100920 @@ -328,11 +386,11 @@ public class SplitTransaction { // region. We could fail halfway through. If we do, we could have left // stuff in fs that needs cleanup -- a storefile or two. Thats why we // add entry to journal BEFORE rather than AFTER the change. - this.journal.add(JournalEntry.STARTED_REGION_A_CREATION); + this.journal.add(new JournalEntry(JournalEntryType.STARTED_REGION_A_CREATION)); HRegion a = this.parent.createDaughterRegionFromSplits(this.hri_a); // Ditto - this.journal.add(JournalEntry.STARTED_REGION_B_CREATION); + this.journal.add(new JournalEntry(JournalEntryType.STARTED_REGION_B_CREATION)); HRegion b = this.parent.createDaughterRegionFromSplits(this.hri_b); return new PairOfSameType(a, b); } @@ -366,7 +424,13 @@ public class SplitTransaction { bOpener.start(); try { aOpener.join(); + if (aOpener.getException() == null) { + journal.add(new JournalEntry(JournalEntryType.OPENED_REGION_A)); + } bOpener.join(); + if (bOpener.getException() == null) { + journal.add(new JournalEntry(JournalEntryType.OPENED_REGION_B)); + } } catch (InterruptedException e) { throw (InterruptedIOException)new InterruptedIOException().initCause(e); } @@ -415,10 +479,12 @@ public class SplitTransaction { final RegionServerServices services, PairOfSameType regions) throws IOException { openDaughters(server, services, regions.getFirst(), regions.getSecond()); + journal.add(new JournalEntry(JournalEntryType.BEFORE_POST_SPLIT_HOOK)); // Coprocessor callback if (parent.getCoprocessorHost() != null) { parent.getCoprocessorHost().postSplit(regions.getFirst(), regions.getSecond()); } + journal.add(new JournalEntry(JournalEntryType.AFTER_POST_SPLIT_HOOK)); return regions; } @@ -616,7 +682,7 @@ public class SplitTransaction { // Iterate in reverse. while (iterator.hasPrevious()) { JournalEntry je = iterator.previous(); - switch(je) { + switch(je.type) { case SET_SPLITTING: if (services != null @@ -665,6 +731,17 @@ public class SplitTransaction { // See HBASE-3872. return false; + // Informational only cases + case STARTED: + case PREPARED: + case BEFORE_PRE_SPLIT_HOOK: + case AFTER_PRE_SPLIT_HOOK: + case BEFORE_POST_SPLIT_HOOK: + case AFTER_POST_SPLIT_HOOK: + case OPENED_REGION_A: + case OPENED_REGION_B: + break; + default: throw new RuntimeException("Unhandled journal entry: " + je); } @@ -684,4 +761,7 @@ public class SplitTransaction { return hri_b; } + List getJournal() { + return journal; + } } -- 1.7.12.4 (Apple Git-37)