diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java b/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java index 7589db3..260f557 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java @@ -144,44 +144,69 @@ class CompactSplitThread extends Thread { } } - private void split(final HRegion region, final byte [] midKey) + private void split(final HRegion parent, final byte [] midKey) throws IOException { - final HRegionInfo oldRegionInfo = region.getRegionInfo(); + final HRegionInfo parentRegionInfo = parent.getRegionInfo(); final long startTime = System.currentTimeMillis(); - final HRegion[] newRegions = region.splitRegion(midKey); + + + + + + final HRegion [] newRegions = parent.splitRegion(midKey); if (newRegions == null) { // Didn't need to be split return; } - // When a region is split, the META table needs to updated if we're - // splitting a 'normal' region, and the ROOT table needs to be - // updated if we are splitting a META region. + // Inform the HRegionServer that the parent HRegion is no-longer online. + this.server.removeFromOnlineRegions(parentRegionInfo); + HTable t = null; - if (region.getRegionInfo().isMetaTable()) { - // We need to update the root region - if (this.root == null) { - this.root = new HTable(conf, HConstants.ROOT_TABLE_NAME); - } - t = root; - } else { - // For normal regions we need to update the meta region - if (meta == null) { - meta = new HTable(conf, HConstants.META_TABLE_NAME); + try { + t = getTable(parent); + Put put = createOfflineParentPut(parentRegionInfo, newRegions); + t.put(put); + // TO RECOVER HERE... NEED TO ADD BACK TO ONINE REGIONS AND RESET!!!! + + // If we crash here, then the daughters will not be added and we'll have + // and offlined parent but no daughters to take up the slack. hbase-2244 + // adds fixup to the metascanners. + + // Add new regions to META + for (int i = 0; i < newRegions.length; i++) { + put = new Put(newRegions[i].getRegionName()); + put.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, + Writables.getBytes(newRegions[i].getRegionInfo())); + t.put(put); } - t = meta; + } catch (IOException e) { + + } finally{ + t.close(); } + // Now tell the master about the new regions. If we fail here, its OK. + // Basescanner will do fix up. And reporting split to master is going away. + this.server.reportSplit(parentRegionInfo, newRegions[0].getRegionInfo(), + newRegions[1].getRegionInfo()); + LOG.info("Region split, META updated, and report to master. Parent=" + + parentRegionInfo.toString() + ", new regions: " + newRegions[0].toString() + + ", " + newRegions[1].toString() + ". Split took " + + StringUtils.formatTimeDiff(System.currentTimeMillis(), startTime)); + } + + private Put createOfflineParentPut(final HRegionInfo parentRegionInfo, + final HRegion [] newRegions) { // Mark old region as offline and split in META. // NOTE: there is no need for retry logic here. HTable does it for us. - oldRegionInfo.setOffline(true); - oldRegionInfo.setSplit(true); - // Inform the HRegionServer that the parent HRegion is no-longer online. - this.server.removeFromOnlineRegions(oldRegionInfo); - - Put put = new Put(oldRegionInfo.getRegionName()); + HRegionInfo editedParentRegionInfo = + new HRegionInfo(parentRegionInfo); + editedParentRegionInfo.setOffline(true); + editedParentRegionInfo.setSplit(true); + Put put = new Put(editedParentRegionInfo.getRegionName()); put.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, - Writables.getBytes(oldRegionInfo)); + Writables.getBytes(editedParentRegionInfo)); put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, HConstants.EMPTY_BYTE_ARRAY); put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, @@ -190,33 +215,34 @@ class CompactSplitThread extends Thread { Writables.getBytes(newRegions[0].getRegionInfo())); put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER, Writables.getBytes(newRegions[1].getRegionInfo())); - t.put(put); - - // If we crash here, then the daughters will not be added and we'll have - // and offlined parent but no daughters to take up the slack. hbase-2244 - // adds fixup to the metascanners. + return put; + } - // Add new regions to META - for (int i = 0; i < newRegions.length; i++) { - put = new Put(newRegions[i].getRegionName()); - put.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, - Writables.getBytes(newRegions[i].getRegionInfo())); - t.put(put); + /* + * @param r Parent region we want to edit. + * @return An HTable instance against the meta table that holds passed + * r + * @throws IOException + */ + private HTable getTable(final HRegion r) throws IOException { + // When a region is split, the META table needs to updated if we're + // splitting a 'normal' region, and the ROOT table needs to be + // updated if we are splitting a META region. + HTable t = null; + if (r.getRegionInfo().isMetaTable()) { + // We need to update the root region + if (this.root == null) { + this.root = new HTable(conf, HConstants.ROOT_TABLE_NAME); + } + t = root; + } else { + // For normal regions we need to update the meta region + if (meta == null) { + meta = new HTable(conf, HConstants.META_TABLE_NAME); + } + t = meta; } - - // If we crash here, the master will not know of the new daughters and they - // will not be assigned. The metascanner when it runs will notice and take - // care of assigning the new daughters. - - // Now tell the master about the new regions - server.reportSplit(oldRegionInfo, newRegions[0].getRegionInfo(), - newRegions[1].getRegionInfo()); - - LOG.info("region split, META updated, and report to master all" + - " successful. Old region=" + oldRegionInfo.toString() + - ", new regions: " + newRegions[0].toString() + ", " + - newRegions[1].toString() + ". Split took " + - StringUtils.formatTimeDiff(System.currentTimeMillis(), startTime)); + return t; } /** diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 6dc41a4..b2d109b 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -226,7 +226,6 @@ public class HRegion implements HeapSize { // , Writable{ // Stop updates lock private final ReentrantReadWriteLock updatesLock = new ReentrantReadWriteLock(); - private final Object splitLock = new Object(); private boolean splitRequest; private final ReadWriteConsistencyControl rwcc = @@ -468,70 +467,68 @@ public class HRegion implements HeapSize { // , Writable{ * * @throws IOException e */ - public List close(final boolean abort) throws IOException { + public synchronized List close(final boolean abort) + throws IOException { if (isClosed()) { - LOG.warn("region " + this + " already closed"); + LOG.warn("Region " + this + " already closed"); return null; } - synchronized (splitLock) { - boolean wasFlushing = false; - synchronized (writestate) { - // Disable compacting and flushing by background threads for this - // region. - writestate.writesEnabled = false; - wasFlushing = writestate.flushing; - LOG.debug("Closing " + this + ": disabling compactions & flushes"); - while (writestate.compacting || writestate.flushing) { - LOG.debug("waiting for" + - (writestate.compacting ? " compaction" : "") + - (writestate.flushing ? - (writestate.compacting ? "," : "") + " cache flush" : - "") + " to complete for region " + this); - try { - writestate.wait(); - } catch (InterruptedException iex) { - // continue - } + boolean wasFlushing = false; + synchronized (writestate) { + // Disable compacting and flushing by background threads for this + // region. + writestate.writesEnabled = false; + wasFlushing = writestate.flushing; + LOG.debug("Closing " + this + ": disabling compactions & flushes"); + while (writestate.compacting || writestate.flushing) { + LOG.debug("waiting for" + + (writestate.compacting ? " compaction" : "") + + (writestate.flushing ? + (writestate.compacting ? "," : "") + " cache flush" : + "") + " to complete for region " + this); + try { + writestate.wait(); + } catch (InterruptedException iex) { + // continue } } - // If we were not just flushing, is it worth doing a preflush...one - // that will clear out of the bulk of the memstore before we put up - // the close flag? - if (!abort && !wasFlushing && worthPreFlushing()) { - LOG.info("Running close preflush of " + this.getRegionNameAsString()); - internalFlushcache(); - } - newScannerLock.writeLock().lock(); - this.closing.set(true); + } + // If we were not just flushing, is it worth doing a preflush...one + // that will clear out of the bulk of the memstore before we put up + // the close flag? + if (!abort && !wasFlushing && worthPreFlushing()) { + LOG.info("Running close preflush of " + this.getRegionNameAsString()); + internalFlushcache(); + } + newScannerLock.writeLock().lock(); + this.closing.set(true); + try { + splitsAndClosesLock.writeLock().lock(); + LOG.debug("Updates disabled for region, no outstanding scanners on " + this); try { - splitsAndClosesLock.writeLock().lock(); - LOG.debug("Updates disabled for region, no outstanding scanners on " + - this); - try { - // Write lock means no more row locks can be given out. Wait on - // outstanding row locks to come in before we close so we do not drop - // outstanding updates. - waitOnRowLocks(); - LOG.debug("No more row locks outstanding on region " + this); - - // Don't flush the cache if we are aborting - if (!abort) { - internalFlushcache(); - } + // Write lock means no more row locks can be given out. Wait on + // outstanding row locks to come in before we close so we do not drop + // outstanding updates. + waitOnRowLocks(); + LOG.debug("No more row locks outstanding on region " + this); + + // Don't flush the cache if we are aborting + if (!abort) { + internalFlushcache(); + } - List result = new ArrayList(); - for (Store store: stores.values()) { - result.addAll(store.close()); - } - this.closed.set(true); - LOG.info("Closed " + this); - return result; - } finally { - splitsAndClosesLock.writeLock().unlock(); + List result = new ArrayList(); + for (Store store: stores.values()) { + result.addAll(store.close()); } + this.closed.set(true); + LOG.info("Closed " + this); + return result; } finally { - newScannerLock.writeLock().unlock(); + splitsAndClosesLock.writeLock().unlock(); } + } finally { + newScannerLock.writeLock().unlock(); } } @@ -621,85 +618,143 @@ public class HRegion implements HeapSize { // , Writable{ return size; } - /* - * Split the HRegion to create two brand-new ones. This also closes - * current HRegion. Split should be fast since we don't rewrite store files - * but instead create new 'reference' store files that read off the top and - * bottom ranges of parent store files. - * @param splitRow row on which to split region - * @return two brand-new HRegions or null if a split is not needed + /** + * Prepare to run the split region "transaction". Runs checks that we can + * actually split. + * Caller must hold the {@link #splitLock}. If this method fails, no damage + * done, can continue. + * @param splitRow Row to split on. + * @return Prospective daughter HRegionInfos or null if setup failed (this + * region is closed, etc). * @throws IOException + * @see #executeSplitRegionTransaction + * @see #rollbackSplitRegionTransaction */ - HRegion [] splitRegion(final byte [] splitRow) throws IOException { + Pair prepareSplitRegionTransaction(final byte [] splitRow) + throws IOException { prepareToSplit(); - synchronized (splitLock) { - if (closed.get()) { - return null; - } - // Add start/end key checking: hbase-428. - byte [] startKey = this.regionInfo.getStartKey(); - byte [] endKey = this.regionInfo.getEndKey(); - if (this.comparator.matchingRows(startKey, 0, startKey.length, - splitRow, 0, splitRow.length)) { - LOG.debug("Startkey and midkey are same, not splitting"); - return null; - } - if (this.comparator.matchingRows(splitRow, 0, splitRow.length, - endKey, 0, endKey.length)) { - LOG.debug("Endkey and midkey are same, not splitting"); - return null; - } - LOG.info("Starting split of region " + this); - Path splits = new Path(this.regiondir, SPLITDIR); - if(!this.fs.exists(splits)) { - this.fs.mkdirs(splits); - } - // Calculate regionid to use. Can't be less than that of parent else - // it'll insert into wrong location over in .META. table: HBASE-710. - long rid = EnvironmentEdgeManager.currentTimeMillis(); - if (rid < this.regionInfo.getRegionId()) { - LOG.warn("Clock skew; parent regions id is " + - this.regionInfo.getRegionId() + " but current time here is " + rid); - rid = this.regionInfo.getRegionId() + 1; - } - HRegionInfo regionAInfo = new HRegionInfo(this.regionInfo.getTableDesc(), - startKey, splitRow, false, rid); - Path dirA = getSplitDirForDaughter(splits, regionAInfo); - HRegionInfo regionBInfo = new HRegionInfo(this.regionInfo.getTableDesc(), - splitRow, endKey, false, rid); - Path dirB = getSplitDirForDaughter(splits, regionBInfo); - - // Now close the HRegion. Close returns all store files or null if not - // supposed to close (? What to do in this case? Implement abort of close?) - // Close also does wait on outstanding rows and calls a flush just-in-case. - List hstoreFilesToSplit = close(false); - if (hstoreFilesToSplit == null) { - LOG.warn("Close came back null (Implement abort of close?)"); - throw new RuntimeException("close returned empty vector of HStoreFiles"); - } + if (closed.get()) return null; + // Add start/end key checking: hbase-428. + byte [] startKey = this.regionInfo.getStartKey(); + byte [] endKey = this.regionInfo.getEndKey(); + if (this.comparator.matchingRows(startKey, 0, startKey.length, + splitRow, 0, splitRow.length)) { + LOG.debug("Startkey and midkey are same, not splitting"); + return null; + } + if (this.comparator.matchingRows(splitRow, 0, splitRow.length, + endKey, 0, endKey.length)) { + LOG.debug("Endkey and midkey are same, not splitting"); + return null; + } + long rid = getDaughterRegionIdTimestamp(this.regionInfo.getRegionId()); + HRegionInfo a = new HRegionInfo(this.regionInfo.getTableDesc(), + startKey, splitRow, false, rid); + HRegionInfo b = new HRegionInfo(this.regionInfo.getTableDesc(), + splitRow, endKey, false, rid); + return new Pair(a, b); + } + + /* + * Calculate daughter regionid to use. Regionid is timestamp. Can't be less + * than that of parent else will insert at wrong location in .META. + * (See HBASE-710). + * @param parentid + * @return Daughter region id timestamp. + */ + private long getDaughterRegionIdTimestamp(final long parentid) { + long rid = EnvironmentEdgeManager.currentTimeMillis(); + if (rid < this.regionInfo.getRegionId()) { + LOG.warn("Clock skew; parent regions id is " + + this.regionInfo.getRegionId() + " but current time here is " + rid); + rid = this.regionInfo.getRegionId() + 1; + } + return rid; + } + + /** + * Run the split region transaction. If we fail this method, call the + * {@link #rollbackSplitRegionTransaction()}. Any changes done by this method + * must have a rollback equivalent registered in + * {@link #rollbackSplitRegionTransaction()}. + * Caller must hold the {@link #splitLock}. + * @param hris + * @param splitRow Row to split around. + * @return two brand-new HRegions or null if a split is not needed + * @throws IOException + */ + HRegion [] executeSplitRegionTransaction(final Pair hris, + final byte [] splitRow) + throws IOException { + // 1. Create split dir + LOG.info("Starting split of region " + this); + Path splitdir = new Path(this.regiondir, SPLITDIR); + if(!this.fs.exists(splitdir)) { + this.fs.mkdirs(splitdir); + } + Path dirA = getSplitDirForDaughter(splitdir, hris.getFirst()); + Path dirB = getSplitDirForDaughter(splitdir, hris.getSecond()); + + // 2. Now close parent region. Close returns all store files or null if + // not supposed to close. + List hstoreFilesToSplit = close(false); + if (hstoreFilesToSplit == null) { + // If close doesn't succeed -- for now consider it fatal. Throw RE. + throw new RuntimeException("Close returned empty list of StoreFiles"); + } // Split each store file. - for(StoreFile h: hstoreFilesToSplit) { - StoreFile.split(fs, - Store.getStoreHomedir(splits, regionAInfo.getEncodedName(), - h.getFamily()), - h, splitRow, Range.bottom); - StoreFile.split(fs, - Store.getStoreHomedir(splits, regionBInfo.getEncodedName(), - h.getFamily()), - h, splitRow, Range.top); + for (StoreFile sf: hstoreFilesToSplit) { + splitStoreFile(sf, splitdir, hris, splitRow); } + // 3. Create region dir for each daughter. // Create a region instance and then move the splits into place under // regionA and regionB. - HRegion regionA = - HRegion.newHRegion(tableDir, log, fs, conf, regionAInfo, null); + HRegion regionA = HRegion.newHRegion(tableDir, log, fs, conf, + hris.getFirst(), null); moveInitialFilesIntoPlace(this.fs, dirA, regionA.getRegionDir()); - HRegion regionB = - HRegion.newHRegion(tableDir, log, fs, conf, regionBInfo, null); + HRegion regionB = HRegion.newHRegion(tableDir, log, fs, conf, + hris.getSecond(), null); moveInitialFilesIntoPlace(this.fs, dirB, regionB.getRegionDir()); return new HRegion [] {regionA, regionB}; + } + + private void splitStoreFile(final StoreFile sf, final Path splitdir, + final Pair hris, final byte [] splitRow) + throws IOException { + byte [] family = sf.getFamily(); + String encoded = hris.getFirst().getEncodedName(); + Path storedir = Store.getStoreHomedir(splitdir, encoded, family); + StoreFile.split(fs, storedir, sf, splitRow, Range.bottom); + encoded = hris.getSecond().getEncodedName(); + storedir = Store.getStoreHomedir(splitdir, encoded, family); + StoreFile.split(fs, storedir, sf, splitRow, Range.top); + } + + /** + * Undo failed {@link #executeSplitRegionTransaction()}. + * Caller must hold the {@link #splitLock}. If we fail, must shutdown the + * hosting regionserver. Otherwise, could be left with regions offlined or + * not correctly registered in .META. + * @param hris + * @throws IOException + */ + private void rollbackSplitRegionTransaction(final Pair hris) + throws IOException { + // Rollback 3., creation of daughter regions. Delete anything in filesystem. + DELETE WHAT IS IN FS FOR EACH DAUGHTER + + // Rollback 2., close of the parent region -- reopen. + REOPEN PARENT. + + // Rollback 1., cleaning up what was done in the daughter dirs. + Path splits = new Path(this.regiondir, SPLITDIR); + if (this.fs.exists(splits)) { + if (!this.fs.delete(splits, true)) { + throw new IOException("Failed delete of " + splits); + } } } diff --git a/src/test/java/org/apache/hadoop/hbase/io/TestImmutableBytesWritable.java b/src/test/java/org/apache/hadoop/hbase/io/TestImmutableBytesWritable.java index 43fa6dd..77c4506 100644 --- a/src/test/java/org/apache/hadoop/hbase/io/TestImmutableBytesWritable.java +++ b/src/test/java/org/apache/hadoop/hbase/io/TestImmutableBytesWritable.java @@ -40,6 +40,13 @@ public class TestImmutableBytesWritable extends TestCase { new ImmutableBytesWritable(Bytes.toBytes("xxabc"), 2, 2).hashCode()); } + public void testSpecificCompare() { + ImmutableBytesWritable ibw1 = new ImmutableBytesWritable(new byte[]{0x0f}); + ImmutableBytesWritable ibw2 = new ImmutableBytesWritable(new byte[]{0x00, 0x00}); + ImmutableBytesWritable.Comparator c = new ImmutableBytesWritable.Comparator(); + assertFalse("ibw1 < ibw2", c.compare( ibw1, ibw2 ) < 0 ); + } + public void testComparison() throws Exception { runTests("aa", "b", -1); runTests("aa", "aa", 0);