Index: hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java (revision 1572887) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java (working copy) @@ -490,6 +490,7 @@ * LQI's corresponding to the resultant hfiles. * * protected for testing + * @throws IOException */ protected List groupOrSplit(Multimap regionGroups, final LoadQueueItem item, final HTable table, @@ -530,6 +531,21 @@ idx = -(idx + 1) - 1; } final int indexForCallable = idx; + + /** + * we can consider there is a region hole in following conditions. 1) if idx < 0,then first + * region info is lost. 2) if the endkey of a region is not equal to the startkey of the next + * region. 3) if the endkey of the last region is not empty. + */ + if (indexForCallable < 0 + || (indexForCallable == startEndKeys.getFirst().length - 1) + && !Bytes.equals(startEndKeys.getSecond()[indexForCallable], HConstants.EMPTY_BYTE_ARRAY) + || indexForCallable + 1 < startEndKeys.getFirst().length + && !(Bytes.compareTo(startEndKeys.getSecond()[indexForCallable], + startEndKeys.getFirst()[indexForCallable + 1]) == 0)) { + throw new IOException("Table " + Bytes.toString(table.getTableName()) + + " has region holes(inconsistent state).Please use hbck tool to fix it first."); + } boolean lastKeyInRange = Bytes.compareTo(last, startEndKeys.getSecond()[idx]) < 0 || Bytes.equals(startEndKeys.getSecond()[idx], HConstants.EMPTY_BYTE_ARRAY); Index: hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java (revision 1572887) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFilesSplitRecovery.java (working copy) @@ -38,17 +38,21 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableExistsException; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.BulkLoadHFileRequest; @@ -402,6 +406,105 @@ fail("doBulkLoad should have thrown an exception"); } + + @Test + public void testGroupOrSplitWhenRegionHoleExistsInMeta() throws Exception { + byte[] QUALIFIER = Bytes.toBytes("myqual"); + byte[] FAMILY = Bytes.toBytes("myfam"); + byte[][] SPLIT_KEYS = + new byte[][] { Bytes.toBytes("10"), Bytes.toBytes("20"), Bytes.toBytes("30") }; + String testName = "groupOrSplitHoleExist"; + byte[][][] hFileRanges = + new byte[][][] { new byte[][] { Bytes.toBytes("20"), Bytes.toBytes("30") } }; + Path dir = util.getDataTestDirOnTestFS(testName); + FileSystem fs = util.getTestFileSystem(); + dir = dir.makeQualified(fs); + Path familyDir = new Path(dir, Bytes.toString(FAMILY)); + + int hFileIdx = 0; + byte[] value = value(2); + for (byte[][] range : hFileRanges) { + byte[] from = range[0]; + byte[] to = range[1]; + TestHRegionServerBulkLoad.createHFileForGivenRange(util.getConfiguration(), fs, new Path( + familyDir, "hfile_" + hFileIdx++), FAMILY, QUALIFIER, from, to, value); + } + + final byte[] TABLE = Bytes.toBytes(testName); + HBaseAdmin admin = new HBaseAdmin(util.getConfiguration()); + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE)); + // set real family name to upper case in purpose to simulate the case that + // family name in HFiles is invalid + HColumnDescriptor family = new HColumnDescriptor(Bytes.toBytes(new String(FAMILY))); + htd.addFamily(family); + admin.createTable(htd, SPLIT_KEYS); + + HTable table = new HTable(util.getConfiguration(), TABLE); + util.waitTableEnabled(TABLE); + final AtomicInteger countedLqis = new AtomicInteger(); + LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()) { + protected List groupOrSplit( + Multimap regionGroups, + final LoadQueueItem item, final HTable htable, + final Pair startEndKeys) throws IOException { + List lqis = super.groupOrSplit(regionGroups, item, htable, startEndKeys); + if (lqis != null) { + countedLqis.addAndGet(lqis.size()); + } + return lqis; + } + }; + + this.deleteMetaInfo(testName, Bytes.toBytes("20")); + + try { + loader.doBulkLoad(dir, table); + } catch (Exception e) { + LOG.error("exeception=", e); + } + + table.close(); + admin.close(); + + this.assertExpectedTable(testName, 0, 2); + } + + private void deleteMetaInfo(String testName, byte[] bytes) { + // TODO Implement TestLoadIncrementalHFilesSplitRecovery.deleteMetaInfo + // Now let's mess it up and delete the regioninfo in hbase:meta to make it inconsistent. + HTable meta = null; + try { + meta = new HTable(util.getConfiguration(), HTableDescriptor.META_TABLEDESC.getTableName()); + Scan scan = new Scan(); + scan.setStartRow(Bytes.toBytes(testName + ",,")); + scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); + + ResultScanner scanner = meta.getScanner(scan); + HRegionInfo hri = null; + Result res = scanner.next(); + HRegionInfo regionInfo = null; + while (res != null) { + regionInfo = + HRegionInfo.parseFrom(res.getValue(HConstants.CATALOG_FAMILY, + HConstants.REGIONINFO_QUALIFIER)); + if (Bytes.toString(regionInfo.getStartKey()).equals(Bytes.toString(bytes))) { + break; + } + res = scanner.next(); + } + + Delete delete = new Delete(regionInfo.getRegionName()); + meta.delete(delete); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (DeserializationException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java (revision 1572887) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java (working copy) @@ -116,6 +116,33 @@ } } + public static void createHFileForGivenRange(Configuration configuration, FileSystem fs, + Path path, byte[] family, byte[] qualifier, byte[] startKey, byte[] endKey, byte[] value) + throws IOException { + HFileContext meta = + new HFileContextBuilder().withBlockSize(TestHRegionServerBulkLoad.BLOCKSIZE) + .withCompression(TestHRegionServerBulkLoad.COMPRESSION).build(); + HFile.Writer writer = + HFile.getWriterFactory(configuration, new CacheConfig(configuration)).withPath(fs, path) + .withFileContext(meta).create(); + long now = System.currentTimeMillis(); + try { + // subtract 2 since iterateOnSplits doesn't include boundary keys + int start = Integer.valueOf(Bytes.toString(startKey)); + // int end = Bytes.toInt(endKey); + int end = Integer.valueOf(Bytes.toString(endKey)); + for (int i = start; i < end; i++) { + KeyValue kv = + new KeyValue(Bytes.toBytes(String.format("%2d", i)), family, qualifier, now, value); + writer.append(kv); + } + } finally { + writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); + writer.close(); + } + } + + /** * Thread that does full scans of the table looking for any partially * completed rows.