diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java b/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java
index 7589db3..260f557 100644
--- a/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java
+++ b/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java
@@ -144,44 +144,69 @@ class CompactSplitThread extends Thread {
}
}
- private void split(final HRegion region, final byte [] midKey)
+ private void split(final HRegion parent, final byte [] midKey)
throws IOException {
- final HRegionInfo oldRegionInfo = region.getRegionInfo();
+ final HRegionInfo parentRegionInfo = parent.getRegionInfo();
final long startTime = System.currentTimeMillis();
- final HRegion[] newRegions = region.splitRegion(midKey);
+
+
+
+
+
+ final HRegion [] newRegions = parent.splitRegion(midKey);
if (newRegions == null) {
// Didn't need to be split
return;
}
- // When a region is split, the META table needs to updated if we're
- // splitting a 'normal' region, and the ROOT table needs to be
- // updated if we are splitting a META region.
+ // Inform the HRegionServer that the parent HRegion is no-longer online.
+ this.server.removeFromOnlineRegions(parentRegionInfo);
+
HTable t = null;
- if (region.getRegionInfo().isMetaTable()) {
- // We need to update the root region
- if (this.root == null) {
- this.root = new HTable(conf, HConstants.ROOT_TABLE_NAME);
- }
- t = root;
- } else {
- // For normal regions we need to update the meta region
- if (meta == null) {
- meta = new HTable(conf, HConstants.META_TABLE_NAME);
+ try {
+ t = getTable(parent);
+ Put put = createOfflineParentPut(parentRegionInfo, newRegions);
+ t.put(put);
+ // TO RECOVER HERE... NEED TO ADD BACK TO ONINE REGIONS AND RESET!!!!
+
+ // If we crash here, then the daughters will not be added and we'll have
+ // and offlined parent but no daughters to take up the slack. hbase-2244
+ // adds fixup to the metascanners.
+
+ // Add new regions to META
+ for (int i = 0; i < newRegions.length; i++) {
+ put = new Put(newRegions[i].getRegionName());
+ put.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
+ Writables.getBytes(newRegions[i].getRegionInfo()));
+ t.put(put);
}
- t = meta;
+ } catch (IOException e) {
+
+ } finally{
+ t.close();
}
+ // Now tell the master about the new regions. If we fail here, its OK.
+ // Basescanner will do fix up. And reporting split to master is going away.
+ this.server.reportSplit(parentRegionInfo, newRegions[0].getRegionInfo(),
+ newRegions[1].getRegionInfo());
+ LOG.info("Region split, META updated, and report to master. Parent=" +
+ parentRegionInfo.toString() + ", new regions: " + newRegions[0].toString() +
+ ", " + newRegions[1].toString() + ". Split took " +
+ StringUtils.formatTimeDiff(System.currentTimeMillis(), startTime));
+ }
+
+ private Put createOfflineParentPut(final HRegionInfo parentRegionInfo,
+ final HRegion [] newRegions) {
// Mark old region as offline and split in META.
// NOTE: there is no need for retry logic here. HTable does it for us.
- oldRegionInfo.setOffline(true);
- oldRegionInfo.setSplit(true);
- // Inform the HRegionServer that the parent HRegion is no-longer online.
- this.server.removeFromOnlineRegions(oldRegionInfo);
-
- Put put = new Put(oldRegionInfo.getRegionName());
+ HRegionInfo editedParentRegionInfo =
+ new HRegionInfo(parentRegionInfo);
+ editedParentRegionInfo.setOffline(true);
+ editedParentRegionInfo.setSplit(true);
+ Put put = new Put(editedParentRegionInfo.getRegionName());
put.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
- Writables.getBytes(oldRegionInfo));
+ Writables.getBytes(editedParentRegionInfo));
put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
HConstants.EMPTY_BYTE_ARRAY);
put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
@@ -190,33 +215,34 @@ class CompactSplitThread extends Thread {
Writables.getBytes(newRegions[0].getRegionInfo()));
put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER,
Writables.getBytes(newRegions[1].getRegionInfo()));
- t.put(put);
-
- // If we crash here, then the daughters will not be added and we'll have
- // and offlined parent but no daughters to take up the slack. hbase-2244
- // adds fixup to the metascanners.
+ return put;
+ }
- // Add new regions to META
- for (int i = 0; i < newRegions.length; i++) {
- put = new Put(newRegions[i].getRegionName());
- put.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
- Writables.getBytes(newRegions[i].getRegionInfo()));
- t.put(put);
+ /*
+ * @param r Parent region we want to edit.
+ * @return An HTable instance against the meta table that holds passed
+ * r
+ * @throws IOException
+ */
+ private HTable getTable(final HRegion r) throws IOException {
+ // When a region is split, the META table needs to updated if we're
+ // splitting a 'normal' region, and the ROOT table needs to be
+ // updated if we are splitting a META region.
+ HTable t = null;
+ if (r.getRegionInfo().isMetaTable()) {
+ // We need to update the root region
+ if (this.root == null) {
+ this.root = new HTable(conf, HConstants.ROOT_TABLE_NAME);
+ }
+ t = root;
+ } else {
+ // For normal regions we need to update the meta region
+ if (meta == null) {
+ meta = new HTable(conf, HConstants.META_TABLE_NAME);
+ }
+ t = meta;
}
-
- // If we crash here, the master will not know of the new daughters and they
- // will not be assigned. The metascanner when it runs will notice and take
- // care of assigning the new daughters.
-
- // Now tell the master about the new regions
- server.reportSplit(oldRegionInfo, newRegions[0].getRegionInfo(),
- newRegions[1].getRegionInfo());
-
- LOG.info("region split, META updated, and report to master all" +
- " successful. Old region=" + oldRegionInfo.toString() +
- ", new regions: " + newRegions[0].toString() + ", " +
- newRegions[1].toString() + ". Split took " +
- StringUtils.formatTimeDiff(System.currentTimeMillis(), startTime));
+ return t;
}
/**
diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 6dc41a4..b2d109b 100644
--- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -226,7 +226,6 @@ public class HRegion implements HeapSize { // , Writable{
// Stop updates lock
private final ReentrantReadWriteLock updatesLock =
new ReentrantReadWriteLock();
- private final Object splitLock = new Object();
private boolean splitRequest;
private final ReadWriteConsistencyControl rwcc =
@@ -468,70 +467,68 @@ public class HRegion implements HeapSize { // , Writable{
*
* @throws IOException e
*/
- public List close(final boolean abort) throws IOException {
+ public synchronized List close(final boolean abort)
+ throws IOException {
if (isClosed()) {
- LOG.warn("region " + this + " already closed");
+ LOG.warn("Region " + this + " already closed");
return null;
}
- synchronized (splitLock) {
- boolean wasFlushing = false;
- synchronized (writestate) {
- // Disable compacting and flushing by background threads for this
- // region.
- writestate.writesEnabled = false;
- wasFlushing = writestate.flushing;
- LOG.debug("Closing " + this + ": disabling compactions & flushes");
- while (writestate.compacting || writestate.flushing) {
- LOG.debug("waiting for" +
- (writestate.compacting ? " compaction" : "") +
- (writestate.flushing ?
- (writestate.compacting ? "," : "") + " cache flush" :
- "") + " to complete for region " + this);
- try {
- writestate.wait();
- } catch (InterruptedException iex) {
- // continue
- }
+ boolean wasFlushing = false;
+ synchronized (writestate) {
+ // Disable compacting and flushing by background threads for this
+ // region.
+ writestate.writesEnabled = false;
+ wasFlushing = writestate.flushing;
+ LOG.debug("Closing " + this + ": disabling compactions & flushes");
+ while (writestate.compacting || writestate.flushing) {
+ LOG.debug("waiting for" +
+ (writestate.compacting ? " compaction" : "") +
+ (writestate.flushing ?
+ (writestate.compacting ? "," : "") + " cache flush" :
+ "") + " to complete for region " + this);
+ try {
+ writestate.wait();
+ } catch (InterruptedException iex) {
+ // continue
}
}
- // If we were not just flushing, is it worth doing a preflush...one
- // that will clear out of the bulk of the memstore before we put up
- // the close flag?
- if (!abort && !wasFlushing && worthPreFlushing()) {
- LOG.info("Running close preflush of " + this.getRegionNameAsString());
- internalFlushcache();
- }
- newScannerLock.writeLock().lock();
- this.closing.set(true);
+ }
+ // If we were not just flushing, is it worth doing a preflush...one
+ // that will clear out of the bulk of the memstore before we put up
+ // the close flag?
+ if (!abort && !wasFlushing && worthPreFlushing()) {
+ LOG.info("Running close preflush of " + this.getRegionNameAsString());
+ internalFlushcache();
+ }
+ newScannerLock.writeLock().lock();
+ this.closing.set(true);
+ try {
+ splitsAndClosesLock.writeLock().lock();
+ LOG.debug("Updates disabled for region, no outstanding scanners on " + this);
try {
- splitsAndClosesLock.writeLock().lock();
- LOG.debug("Updates disabled for region, no outstanding scanners on " +
- this);
- try {
- // Write lock means no more row locks can be given out. Wait on
- // outstanding row locks to come in before we close so we do not drop
- // outstanding updates.
- waitOnRowLocks();
- LOG.debug("No more row locks outstanding on region " + this);
-
- // Don't flush the cache if we are aborting
- if (!abort) {
- internalFlushcache();
- }
+ // Write lock means no more row locks can be given out. Wait on
+ // outstanding row locks to come in before we close so we do not drop
+ // outstanding updates.
+ waitOnRowLocks();
+ LOG.debug("No more row locks outstanding on region " + this);
+
+ // Don't flush the cache if we are aborting
+ if (!abort) {
+ internalFlushcache();
+ }
- List result = new ArrayList();
- for (Store store: stores.values()) {
- result.addAll(store.close());
- }
- this.closed.set(true);
- LOG.info("Closed " + this);
- return result;
- } finally {
- splitsAndClosesLock.writeLock().unlock();
+ List result = new ArrayList();
+ for (Store store: stores.values()) {
+ result.addAll(store.close());
}
+ this.closed.set(true);
+ LOG.info("Closed " + this);
+ return result;
} finally {
- newScannerLock.writeLock().unlock();
+ splitsAndClosesLock.writeLock().unlock();
}
+ } finally {
+ newScannerLock.writeLock().unlock();
}
}
@@ -621,85 +618,143 @@ public class HRegion implements HeapSize { // , Writable{
return size;
}
- /*
- * Split the HRegion to create two brand-new ones. This also closes
- * current HRegion. Split should be fast since we don't rewrite store files
- * but instead create new 'reference' store files that read off the top and
- * bottom ranges of parent store files.
- * @param splitRow row on which to split region
- * @return two brand-new HRegions or null if a split is not needed
+ /**
+ * Prepare to run the split region "transaction". Runs checks that we can
+ * actually split.
+ * Caller must hold the {@link #splitLock}. If this method fails, no damage
+ * done, can continue.
+ * @param splitRow Row to split on.
+ * @return Prospective daughter HRegionInfos or null if setup failed (this
+ * region is closed, etc).
* @throws IOException
+ * @see #executeSplitRegionTransaction
+ * @see #rollbackSplitRegionTransaction
*/
- HRegion [] splitRegion(final byte [] splitRow) throws IOException {
+ Pair prepareSplitRegionTransaction(final byte [] splitRow)
+ throws IOException {
prepareToSplit();
- synchronized (splitLock) {
- if (closed.get()) {
- return null;
- }
- // Add start/end key checking: hbase-428.
- byte [] startKey = this.regionInfo.getStartKey();
- byte [] endKey = this.regionInfo.getEndKey();
- if (this.comparator.matchingRows(startKey, 0, startKey.length,
- splitRow, 0, splitRow.length)) {
- LOG.debug("Startkey and midkey are same, not splitting");
- return null;
- }
- if (this.comparator.matchingRows(splitRow, 0, splitRow.length,
- endKey, 0, endKey.length)) {
- LOG.debug("Endkey and midkey are same, not splitting");
- return null;
- }
- LOG.info("Starting split of region " + this);
- Path splits = new Path(this.regiondir, SPLITDIR);
- if(!this.fs.exists(splits)) {
- this.fs.mkdirs(splits);
- }
- // Calculate regionid to use. Can't be less than that of parent else
- // it'll insert into wrong location over in .META. table: HBASE-710.
- long rid = EnvironmentEdgeManager.currentTimeMillis();
- if (rid < this.regionInfo.getRegionId()) {
- LOG.warn("Clock skew; parent regions id is " +
- this.regionInfo.getRegionId() + " but current time here is " + rid);
- rid = this.regionInfo.getRegionId() + 1;
- }
- HRegionInfo regionAInfo = new HRegionInfo(this.regionInfo.getTableDesc(),
- startKey, splitRow, false, rid);
- Path dirA = getSplitDirForDaughter(splits, regionAInfo);
- HRegionInfo regionBInfo = new HRegionInfo(this.regionInfo.getTableDesc(),
- splitRow, endKey, false, rid);
- Path dirB = getSplitDirForDaughter(splits, regionBInfo);
-
- // Now close the HRegion. Close returns all store files or null if not
- // supposed to close (? What to do in this case? Implement abort of close?)
- // Close also does wait on outstanding rows and calls a flush just-in-case.
- List hstoreFilesToSplit = close(false);
- if (hstoreFilesToSplit == null) {
- LOG.warn("Close came back null (Implement abort of close?)");
- throw new RuntimeException("close returned empty vector of HStoreFiles");
- }
+ if (closed.get()) return null;
+ // Add start/end key checking: hbase-428.
+ byte [] startKey = this.regionInfo.getStartKey();
+ byte [] endKey = this.regionInfo.getEndKey();
+ if (this.comparator.matchingRows(startKey, 0, startKey.length,
+ splitRow, 0, splitRow.length)) {
+ LOG.debug("Startkey and midkey are same, not splitting");
+ return null;
+ }
+ if (this.comparator.matchingRows(splitRow, 0, splitRow.length,
+ endKey, 0, endKey.length)) {
+ LOG.debug("Endkey and midkey are same, not splitting");
+ return null;
+ }
+ long rid = getDaughterRegionIdTimestamp(this.regionInfo.getRegionId());
+ HRegionInfo a = new HRegionInfo(this.regionInfo.getTableDesc(),
+ startKey, splitRow, false, rid);
+ HRegionInfo b = new HRegionInfo(this.regionInfo.getTableDesc(),
+ splitRow, endKey, false, rid);
+ return new Pair(a, b);
+ }
+
+ /*
+ * Calculate daughter regionid to use. Regionid is timestamp. Can't be less
+ * than that of parent else will insert at wrong location in .META.
+ * (See HBASE-710).
+ * @param parentid
+ * @return Daughter region id timestamp.
+ */
+ private long getDaughterRegionIdTimestamp(final long parentid) {
+ long rid = EnvironmentEdgeManager.currentTimeMillis();
+ if (rid < this.regionInfo.getRegionId()) {
+ LOG.warn("Clock skew; parent regions id is " +
+ this.regionInfo.getRegionId() + " but current time here is " + rid);
+ rid = this.regionInfo.getRegionId() + 1;
+ }
+ return rid;
+ }
+
+ /**
+ * Run the split region transaction. If we fail this method, call the
+ * {@link #rollbackSplitRegionTransaction()}. Any changes done by this method
+ * must have a rollback equivalent registered in
+ * {@link #rollbackSplitRegionTransaction()}.
+ * Caller must hold the {@link #splitLock}.
+ * @param hris
+ * @param splitRow Row to split around.
+ * @return two brand-new HRegions or null if a split is not needed
+ * @throws IOException
+ */
+ HRegion [] executeSplitRegionTransaction(final Pair hris,
+ final byte [] splitRow)
+ throws IOException {
+ // 1. Create split dir
+ LOG.info("Starting split of region " + this);
+ Path splitdir = new Path(this.regiondir, SPLITDIR);
+ if(!this.fs.exists(splitdir)) {
+ this.fs.mkdirs(splitdir);
+ }
+ Path dirA = getSplitDirForDaughter(splitdir, hris.getFirst());
+ Path dirB = getSplitDirForDaughter(splitdir, hris.getSecond());
+
+ // 2. Now close parent region. Close returns all store files or null if
+ // not supposed to close.
+ List hstoreFilesToSplit = close(false);
+ if (hstoreFilesToSplit == null) {
+ // If close doesn't succeed -- for now consider it fatal. Throw RE.
+ throw new RuntimeException("Close returned empty list of StoreFiles");
+ }
// Split each store file.
- for(StoreFile h: hstoreFilesToSplit) {
- StoreFile.split(fs,
- Store.getStoreHomedir(splits, regionAInfo.getEncodedName(),
- h.getFamily()),
- h, splitRow, Range.bottom);
- StoreFile.split(fs,
- Store.getStoreHomedir(splits, regionBInfo.getEncodedName(),
- h.getFamily()),
- h, splitRow, Range.top);
+ for (StoreFile sf: hstoreFilesToSplit) {
+ splitStoreFile(sf, splitdir, hris, splitRow);
}
+ // 3. Create region dir for each daughter.
// Create a region instance and then move the splits into place under
// regionA and regionB.
- HRegion regionA =
- HRegion.newHRegion(tableDir, log, fs, conf, regionAInfo, null);
+ HRegion regionA = HRegion.newHRegion(tableDir, log, fs, conf,
+ hris.getFirst(), null);
moveInitialFilesIntoPlace(this.fs, dirA, regionA.getRegionDir());
- HRegion regionB =
- HRegion.newHRegion(tableDir, log, fs, conf, regionBInfo, null);
+ HRegion regionB = HRegion.newHRegion(tableDir, log, fs, conf,
+ hris.getSecond(), null);
moveInitialFilesIntoPlace(this.fs, dirB, regionB.getRegionDir());
return new HRegion [] {regionA, regionB};
+ }
+
+ private void splitStoreFile(final StoreFile sf, final Path splitdir,
+ final Pair hris, final byte [] splitRow)
+ throws IOException {
+ byte [] family = sf.getFamily();
+ String encoded = hris.getFirst().getEncodedName();
+ Path storedir = Store.getStoreHomedir(splitdir, encoded, family);
+ StoreFile.split(fs, storedir, sf, splitRow, Range.bottom);
+ encoded = hris.getSecond().getEncodedName();
+ storedir = Store.getStoreHomedir(splitdir, encoded, family);
+ StoreFile.split(fs, storedir, sf, splitRow, Range.top);
+ }
+
+ /**
+ * Undo failed {@link #executeSplitRegionTransaction()}.
+ * Caller must hold the {@link #splitLock}. If we fail, must shutdown the
+ * hosting regionserver. Otherwise, could be left with regions offlined or
+ * not correctly registered in .META.
+ * @param hris
+ * @throws IOException
+ */
+ private void rollbackSplitRegionTransaction(final Pair hris)
+ throws IOException {
+ // Rollback 3., creation of daughter regions. Delete anything in filesystem.
+ DELETE WHAT IS IN FS FOR EACH DAUGHTER
+
+ // Rollback 2., close of the parent region -- reopen.
+ REOPEN PARENT.
+
+ // Rollback 1., cleaning up what was done in the daughter dirs.
+ Path splits = new Path(this.regiondir, SPLITDIR);
+ if (this.fs.exists(splits)) {
+ if (!this.fs.delete(splits, true)) {
+ throw new IOException("Failed delete of " + splits);
+ }
}
}
diff --git a/src/test/java/org/apache/hadoop/hbase/io/TestImmutableBytesWritable.java b/src/test/java/org/apache/hadoop/hbase/io/TestImmutableBytesWritable.java
index 43fa6dd..77c4506 100644
--- a/src/test/java/org/apache/hadoop/hbase/io/TestImmutableBytesWritable.java
+++ b/src/test/java/org/apache/hadoop/hbase/io/TestImmutableBytesWritable.java
@@ -40,6 +40,13 @@ public class TestImmutableBytesWritable extends TestCase {
new ImmutableBytesWritable(Bytes.toBytes("xxabc"), 2, 2).hashCode());
}
+ public void testSpecificCompare() {
+ ImmutableBytesWritable ibw1 = new ImmutableBytesWritable(new byte[]{0x0f});
+ ImmutableBytesWritable ibw2 = new ImmutableBytesWritable(new byte[]{0x00, 0x00});
+ ImmutableBytesWritable.Comparator c = new ImmutableBytesWritable.Comparator();
+ assertFalse("ibw1 < ibw2", c.compare( ibw1, ibw2 ) < 0 );
+ }
+
public void testComparison() throws Exception {
runTests("aa", "b", -1);
runTests("aa", "aa", 0);