Index: src/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java (revision 910299) +++ src/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java (working copy) @@ -199,6 +199,10 @@ Writables.getBytes(newRegions[1].getRegionInfo())); t.put(put); + // If we crash here, then the daughters will not be added and we'll have + // and offlined parent but no daughters to take up the slack. hbase-2244 + // adds fixup to the metascanners. + // Add new regions to META for (int i = 0; i < newRegions.length; i++) { put = new Put(newRegions[i].getRegionName()); @@ -206,17 +210,20 @@ newRegions[i].getRegionInfo())); t.put(put); } - + + // If we crash here, the master will not know of the new daughters and they + // will not be assigned. The metascanner when it runs will notice and take + // care of assigning the new daughters. + // Now tell the master about the new regions server.reportSplit(oldRegionInfo, newRegions[0].getRegionInfo(), newRegions[1].getRegionInfo()); + LOG.info("region split, META updated, and report to master all" + " successful. Old region=" + oldRegionInfo.toString() + ", new regions: " + newRegions[0].toString() + ", " + newRegions[1].toString() + ". Split took " + StringUtils.formatTimeDiff(System.currentTimeMillis(), startTime)); - - // Do not serve the new regions. Let the Master assign them. } /** Index: src/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 910299) +++ src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -268,7 +268,7 @@ this.regiondir = new Path(basedir, encodedNameStr); if (LOG.isDebugEnabled()) { // Write out region name as string and its encoded name. - LOG.debug("Opening region " + this + ", encoded=" + + LOG.debug("Creating region " + this + ", encoded=" + this.regionInfo.getEncodedName()); } this.regionCompactionDir = @@ -295,11 +295,7 @@ throws IOException { Path oldLogFile = new Path(regiondir, HREGION_OLDLOGFILE_NAME); - // Move prefab HStore files into place (if any). This picks up split files - // and any merges from splits and merges dirs. - if (initialFiles != null && fs.exists(initialFiles)) { - fs.rename(initialFiles, this.regiondir); - } + moveInitialFilesIntoPlace(this.fs, initialFiles, this.regiondir); // Write HRI to a file in case we need to recover .META. checkRegioninfoOnFilesystem(); @@ -334,7 +330,9 @@ // Add one to the current maximum sequence id so new edits are beyond. this.minSequenceId = maxSeqId + 1; - // Get rid of any splits or merges that were lost in-progress + // Get rid of any splits or merges that were lost in-progress. Clean out + // these directories here on open. We may be opening a region that was + // being split but we crashed in the middle of it all. FSUtils.deleteDirectory(this.fs, new Path(regiondir, SPLITDIR)); FSUtils.deleteDirectory(this.fs, new Path(regiondir, MERGEDIR)); @@ -350,6 +348,20 @@ " available; sequence id is " + this.minSequenceId); } + /* + * Move any passed HStore files into place (if any). Used to pick up split + * files and any merges from splits and merges dirs. + * @param initialFiles + * @throws IOException + */ + private static void moveInitialFilesIntoPlace(final FileSystem fs, + final Path initialFiles, final Path regiondir) + throws IOException { + if (initialFiles != null && fs.exists(initialFiles)) { + fs.rename(initialFiles, regiondir); + } + } + /** * @return True if this region has references. */ @@ -588,7 +600,7 @@ * but instead create new 'reference' store files that read off the top and * bottom ranges of parent store files. * @param splitRow row on which to split region - * @return two brand-new (and open) HRegions or null if a split is not needed + * @return two brand-new HRegions or null if a split is not needed * @throws IOException */ HRegion [] splitRegion(final byte [] splitRow) throws IOException { @@ -625,18 +637,10 @@ } HRegionInfo regionAInfo = new HRegionInfo(this.regionInfo.getTableDesc(), startKey, splitRow, false, rid); - Path dirA = - new Path(splits, Integer.toString(regionAInfo.getEncodedName())); - if(fs.exists(dirA)) { - throw new IOException("Cannot split; target file collision at " + dirA); - } + Path dirA = createDaughterDirInSplitDir(splits, regionAInfo); HRegionInfo regionBInfo = new HRegionInfo(this.regionInfo.getTableDesc(), splitRow, endKey, false, rid); - Path dirB = - new Path(splits, Integer.toString(regionBInfo.getEncodedName())); - if(this.fs.exists(dirB)) { - throw new IOException("Cannot split; target file collision at " + dirB); - } + Path dirB = createDaughterDirInSplitDir(splits, regionBInfo); // Now close the HRegion. Close returns all store files or null if not // supposed to close (? What to do in this case? Implement abort of close?) @@ -659,26 +663,42 @@ h, splitRow, Range.top); } - // Done! - // Opening the region copies the splits files from the splits directory - // under each region. - HRegion regionA = HRegion.newHRegion(basedir, log, fs, conf, regionAInfo, null); - regionA.initialize(dirA, null); - regionA.close(); - HRegion regionB = HRegion.newHRegion(basedir, log, fs, conf, regionBInfo, null); - regionB.initialize(dirB, null); - regionB.close(); + // Crate a region instance and then move the splits into place under + // regionA and regionB. + HRegion regionA = + HRegion.newHRegion(basedir, log, fs, conf, regionAInfo, null); + moveInitialFilesIntoPlace(this.fs, dirA, regionA.getRegionDir()); + HRegion regionB = + HRegion.newHRegion(basedir, log, fs, conf, regionBInfo, null); + moveInitialFilesIntoPlace(this.fs, dirB, regionB.getRegionDir()); - // Cleanup - boolean deleted = fs.delete(splits, true); // Get rid of splits directory - if (LOG.isDebugEnabled()) { - LOG.debug("Cleaned up " + FSUtils.getPath(splits) + " " + deleted); - } HRegion regions[] = new HRegion [] {regionA, regionB}; return regions; } } - + + /* + * Create the daughter directories in the splits dir that is under the parent + * regions' directory. + * @param splits + * @param daughter + * @return + * @throws IOException + */ + private Path createDaughterDirInSplitDir(final Path splits, + final HRegionInfo daughter) + throws IOException { + Path d = + new Path(splits, Integer.toString(daughter.getEncodedName())); + if (fs.exists(d)) { + // This should never happen; the splits dir will be newly made when we + // come in here. Even if we crashed midway through a split, the reopen + // of the parent region clears out the dir in its initialize method. + throw new IOException("Cannot split; target file collision at " + d); + } + return d; + } + protected void prepareToSplit() { // nothing } Index: src/java/org/apache/hadoop/hbase/master/BaseScanner.java =================================================================== --- src/java/org/apache/hadoop/hbase/master/BaseScanner.java (revision 910299) +++ src/java/org/apache/hadoop/hbase/master/BaseScanner.java (working copy) @@ -40,6 +40,7 @@ import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.ipc.HRegionInterface; @@ -48,6 +49,7 @@ import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.ipc.RemoteException; @@ -100,7 +102,22 @@ */ abstract class BaseScanner extends Chore implements HConstants { static final Log LOG = LogFactory.getLog(BaseScanner.class.getName()); - + // These are names of new columns in a meta region offlined parent row. They + // are added by the metascanner after we verify that split daughter made it + // in. Their value is 'true' if present. + private static final byte [] SPLITA_CHECKED = + Bytes.toBytes(Bytes.toString(SPLITA_QUALIFIER) + "_checked"); + private static final byte [] SPLITB_CHECKED = + Bytes.toBytes(Bytes.toString(SPLITB_QUALIFIER) + "_checked"); + // Make the 'true' Writable once only. + private static byte [] TRUE_WRITABLE_AS_BYTES; + static { + try { + TRUE_WRITABLE_AS_BYTES = Writables.getBytes(new BooleanWritable(true)); + } catch (IOException e) { + e.printStackTrace(); + } + } private final boolean rootRegion; protected final HMaster master; @@ -147,9 +164,9 @@ region.toString()); // Array to hold list of split parents found. Scan adds to list. After - // scan we go check if parents can be removed. - Map splitParents = - new HashMap(); + // scan we go check if parents can be removed and that their daughters + // are in place. + Map splitParents = new HashMap(); List emptyRows = new ArrayList(); int rows = 0; try { @@ -213,12 +230,13 @@ emptyRows); } - // Take a look at split parents to see if any we can clean up. - + // Take a look at split parents to see if any we can clean up any and to + // make sure that daughter regions are in place. if (splitParents.size() > 0) { for (Map.Entry e : splitParents.entrySet()) { HRegionInfo hri = e.getKey(); - cleanupSplits(region.getRegionName(), regionServer, hri, e.getValue()); + cleanupAndVerifySplits(region.getRegionName(), regionServer, + hri, e.getValue()); } } LOG.info(Thread.currentThread().getName() + " scan of " + rows + @@ -260,24 +278,28 @@ /* * If daughters no longer hold reference to the parents, delete the parent. - * @param metaRegionName Meta region name. + * If the parent is lone without daughter splits AND there are references in + * the filesystem, then a daughters was not added o .META. -- must have been + * a crash before their addition. Add them here. + * @param metaRegionName Meta region name: e.g. .META.,,1 * @param server HRegionInterface of meta server to talk to - * @param parent HRegionInfo of split parent + * @param parent HRegionInfo of split offlined parent * @param rowContent Content of parent row in * metaRegionName * @return True if we removed parent from meta table and from * the filesystem. * @throws IOException */ - private boolean cleanupSplits(final byte [] metaRegionName, + private boolean cleanupAndVerifySplits(final byte [] metaRegionName, final HRegionInterface srvr, final HRegionInfo parent, Result rowContent) throws IOException { boolean result = false; - boolean hasReferencesA = hasReferences(metaRegionName, srvr, - parent.getRegionName(), rowContent, CATALOG_FAMILY, SPLITA_QUALIFIER); - boolean hasReferencesB = hasReferences(metaRegionName, srvr, - parent.getRegionName(), rowContent, CATALOG_FAMILY, SPLITB_QUALIFIER); + // Run checks on each daughter split. + boolean hasReferencesA = checkDaughter(metaRegionName, srvr, + parent, rowContent, SPLITA_QUALIFIER); + boolean hasReferencesB = checkDaughter(metaRegionName, srvr, + parent, rowContent, SPLITB_QUALIFIER); if (!hasReferencesA && !hasReferencesB) { LOG.info("Deleting region " + parent.getRegionNameAsString() + " (encoded=" + parent.getEncodedName() + @@ -289,26 +311,176 @@ } return result; } + + /* + * See if the passed daughter has references in the filesystem to the parent + * and if not, remove the note of daughter region in the parent row: its + * column info:splitA or info:splitB. Also make sure that daughter row is + * present in the .META. and mark the parent row when confirmed so we don't + * keep checking. The mark will be info:splitA_present and its value will be + * a true BooleanWritable. + * @param metaRegionName + * @param srvr + * @param parent + * @param rowContent + * @param daughter + * @return True if this daughter still has references to the parent. + * @throws IOException + */ + private boolean checkDaughter(final byte [] metaRegionName, + final HRegionInterface srvr, final HRegionInfo parent, + final Result rowContent, final byte [] daughter) + throws IOException { + HRegionInfo hri = getDaughterRegionInfo(rowContent, daughter); + boolean references = hasReferences(metaRegionName, srvr, + parent.getRegionName(), rowContent, hri, daughter); + if (references) { + if (!verifyDaughterRowPresent(rowContent, daughter, srvr, + metaRegionName, hri)) { + // If we got here, we added a daughter region to metatable. Update + // parent row that daughter has been verified present so we don't check + // for it by doing a get each time through here. + addDaughterRowPresent(metaRegionName, srvr, parent.getRegionName(), + hri, daughter); + } + } else { + removeDaughterFromParent(metaRegionName, srvr, parent.getRegionName(), + hri, daughter); + } + return references; + } + + /* + * Check the daughter of parent is present in meta table. If not there, + * add it and mark the parent row that we've verified its presence. + * @param rowContent + * @param daughter + * @param srvr + * @param metaRegionName + * @param daughterHRI + * @throws IOException + * @return If true, the daughter is present. If not, and no exceptions, we + * just added it to meta. + */ + private boolean verifyDaughterRowPresent(final Result rowContent, + final byte [] daughter, final HRegionInterface srvr, + final byte [] metaRegionName, + final HRegionInfo daughterHRI) + throws IOException { + boolean present = getDaughterRowPresent(rowContent, daughter); + if (present) return present; + byte [] daughterRowKey = daughterHRI.getRegionName(); + Get g = new Get(daughterRowKey); + g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); + Result r = srvr.get(metaRegionName, g); + if (r == null || r.isEmpty()) { + // Daughter row not present. Insert it. + LOG.warn("Doing fixup on parent daughter because not present: " + + daughterHRI.toString()); + Put p = new Put(daughterRowKey); + p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, + Writables.getBytes(daughterHRI)); + srvr.put(metaRegionName, p); + } + return present; + } + + /* + * Add to parent a marker that we verified the daughter exists. + * @param metaRegionName + * @param srvr + * @param parent + * @param split + * @param daughter + * @throws IOException + */ + private void addDaughterRowPresent(final byte [] metaRegionName, + final HRegionInterface srvr, final byte [] parent, + final HRegionInfo split, final byte [] daughter) + throws IOException { + Put p = new Put(parent); + p.add(CATALOG_FAMILY, getNameOfVerifiedDaughterColumn(daughter), + TRUE_WRITABLE_AS_BYTES); + srvr.put(metaRegionName, p); + } + + /* + * @param rowContent + * @param which + * @return True if the daughter row has been verified present -- it'll be + * one of the columns in the offlined parent row. + * @throws IOException + */ + private boolean getDaughterRowPresent(final Result rowContent, + final byte [] which) + throws IOException { + BooleanWritable bw = + (BooleanWritable)Writables.getWritable(rowContent.getValue(CATALOG_FAMILY, + getNameOfVerifiedDaughterColumn(which)), + new BooleanWritable()); + return bw == null? false: bw.get(); + } + + private static byte [] getNameOfVerifiedDaughterColumn(final byte [] daughter) { + return Bytes.equals(SPLITA_QUALIFIER, daughter)? + SPLITA_CHECKED: SPLITB_CHECKED; + } + + /* + * @param rowContent + * @param which Whether "info:splitA" or "info:splitB" column + * @return Deserialized content of the info:splitA or info:splitB as a + * HRegionInfo + * @throws IOException + */ + private HRegionInfo getDaughterRegionInfo(final Result rowContent, + final byte [] which) + throws IOException { + return Writables.getHRegionInfoOrNull(rowContent.getValue(CATALOG_FAMILY, which)); + } + + /* + * This daughter no longer references parent. Remove mention of it from the + * parent row. + * @param metaRegionName + * @param srvr + * @param parent + * @param split + * @param daughter + * @throws IOException + */ + private void removeDaughterFromParent(final byte [] metaRegionName, + final HRegionInterface srvr, final byte [] parent, + final HRegionInfo split, final byte [] daughter) + throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug(split.getRegionNameAsString() + "/" + split.getEncodedName() + + " no longer has references to " + Bytes.toStringBinary(parent)); + } + Delete delete = new Delete(parent); + delete.deleteColumns(HConstants.CATALOG_FAMILY, daughter); + srvr.delete(metaRegionName, delete); + } + /* * Checks if a daughter region -- either splitA or splitB -- still holds * references to parent. If not, removes reference to the split from - * the parent meta region row. + * the parent meta region row so we don't check it any more. * @param metaRegionName Name of meta region to look in. * @param srvr Where region resides. * @param parent Parent region name. * @param rowContent Keyed content of the parent row in meta region. - * @param splitColumn Column name of daughter split to examine + * @param split Which column family. + * @param qualifier Which of the daughters to look at, splitA or splitB. * @return True if still has references to parent. * @throws IOException */ private boolean hasReferences(final byte [] metaRegionName, final HRegionInterface srvr, final byte [] parent, - Result rowContent, final byte [] splitFamily, byte [] splitQualifier) + Result rowContent, final HRegionInfo split, byte [] qualifier) throws IOException { boolean result = false; - HRegionInfo split = - Writables.getHRegionInfoOrNull(rowContent.getValue(splitFamily, splitQualifier)); if (split == null) { return result; } @@ -332,20 +504,6 @@ break; } } - - if (result) { - return result; - } - - if (LOG.isDebugEnabled()) { - LOG.debug(split.getRegionNameAsString() + "/" + split.getEncodedName() - + " no longer has references to " + Bytes.toStringBinary(parent)); - } - - Delete delete = new Delete(parent); - delete.deleteColumns(splitFamily, splitQualifier); - srvr.delete(metaRegionName, delete); - return result; }