Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 1081994) +++ CHANGES.txt (working copy) @@ -113,6 +113,8 @@ HBASE-2495 Allow record filtering with selected row key values in HBase Export (Subbu M Iyer via Stack) HBASE-3600 Update our jruby to 1.6.0 + HBASE-3440 Clean out load_table.rb and make sure all roads lead to + completebulkload tool (Vidhyashankar Venkataraman via Stack) TASK HBASE-3559 Move report of split to master OFF the heartbeat channel Index: src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java (revision 1081993) +++ src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java (working copy) @@ -22,6 +22,8 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.util.Arrays; +import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -186,4 +188,70 @@ writer.close(); } } + + private void addStartEndKeysForTest(TreeMap map, byte[] first, byte[] last) { + Integer value = map.containsKey(first)?(Integer)map.get(first):0; + map.put(first, value+1); + + value = map.containsKey(last)?(Integer)map.get(last):0; + map.put(last, value-1); + } + + @Test + public void testInferBoundaries() { + TreeMap map = new TreeMap(Bytes.BYTES_COMPARATOR); + + /* Toy example + * c---------i o------p s---------t v------x + * a------e g-----k m-------------q r----s u----w + * + * Should be inferred as: + * a-----------------k m-------------q r--------------t u---------x + * + * The output should be (m,r,u) + */ + + String first; + String last; + + first = "a"; last = "e"; + addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); + + first = "r"; last = "s"; + addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); + + first = "o"; last = "p"; + addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); + + first = "g"; last = "k"; + addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); + + first = "v"; last = "x"; + addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); + + first = "c"; last = "i"; + addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); + + first = "m"; last = "q"; + addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); + + first = "s"; last = "t"; + addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); + + first = "u"; last = "w"; + addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); + + byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map); + byte[][] compare = new byte[3][]; + compare[0] = "m".getBytes(); + compare[1] = "r".getBytes(); + compare[2] = "u".getBytes(); + + assertEquals(keysArray.length, 3); + + for (int row = 0; row bdryMap) { + ArrayList keysArray = new ArrayList(); + int runningValue = 0; + byte[] currStartKey = null; + boolean firstBoundary = true; + + for (Map.Entry item: bdryMap.entrySet()) { + if (runningValue == 0) currStartKey = item.getKey(); + runningValue += item.getValue(); + if (runningValue == 0) { + if (!firstBoundary) keysArray.add(currStartKey); + firstBoundary = false; + } + } + + return keysArray.toArray(new byte[0][0]); + } + + /* + * If the table is created for the first time, then "completebulkload" reads the files twice. + * More modifications necessary if we want to avoid doing it. + */ + private void createTable(String tableName, String dirPath) throws Exception { + Path hfofDir = new Path(dirPath); + FileSystem fs = hfofDir.getFileSystem(getConf()); + if (!fs.exists(hfofDir)) { + throw new FileNotFoundException("HFileOutputFormat dir " + + hfofDir + " not found"); + } + + FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); + if (familyDirStatuses == null) { + throw new FileNotFoundException("No families found in " + hfofDir); + } + + HTableDescriptor htd = new HTableDescriptor(tableName); + HColumnDescriptor hcd = null; + + // Add column families + // Build a set of keys + byte[][] keys = null; + TreeMap map = new TreeMap(Bytes.BYTES_COMPARATOR); + + for (FileStatus stat : familyDirStatuses) { + if (!stat.isDir()) { + LOG.warn("Skipping non-directory " + stat.getPath()); + continue; + } + Path familyDir = stat.getPath(); + // Skip _logs, etc + if (familyDir.getName().startsWith("_")) continue; + byte[] family = familyDir.getName().getBytes(); + + hcd = new HColumnDescriptor(family); + htd.addFamily(hcd); + + Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); + for (Path hfile : hfiles) { + if (hfile.getName().startsWith("_")) continue; + + HFile.Reader reader = new HFile.Reader(fs, hfile, null, false, false); + final byte[] first, last; + try { + reader.loadFileInfo(); + first = reader.getFirstRowKey(); + last = reader.getLastRowKey(); + + LOG.info("Trying to figure out region boundaries hfile=" + hfile + + " first=" + Bytes.toStringBinary(first) + + " last=" + Bytes.toStringBinary(last)); + + // To eventually infer start key-end key boundaries + Integer value = map.containsKey(first)?(Integer)map.get(first):0; + map.put(first, value+1); + + value = map.containsKey(last)?(Integer)map.get(last):0; + map.put(last, value-1); + } finally { + reader.close(); + } + } + } + + keys = LoadIncrementalHFiles.inferBoundaries(map); + try { + this.hbAdmin.createTableAsync(htd, keys); + } catch (java.net.SocketTimeoutException e) { + System.err.println("Caught Socket timeout.. Mostly caused by a slow region assignment by master!"); + } + + HTable table = new HTable(tableName); + + HConnection conn = table.getConnection(); + int ctr = 0; + while (!conn.isTableAvailable(table.getTableName()) && (ctr