Index: hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java (revision 1503079) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java (working copy) @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.tool; +import org.apache.commons.lang.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -36,6 +37,8 @@ import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; /** * HBase Canary Tool, that that can be used to do @@ -47,23 +50,23 @@ public final class Canary implements Tool { // Sink interface used by the canary to outputs information public interface Sink { - void publishReadFailure(HRegionInfo region); - void publishReadFailure(HRegionInfo region, HColumnDescriptor column); - void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); + public void publishReadFailure(HRegionInfo region, Exception e); + public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e); + public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); } // Simple implementation of canary sink that allows to plot on // file or standard output timings or failures. public static class StdOutSink implements Sink { @Override - public void publishReadFailure(HRegionInfo region) { - LOG.error(String.format("read from region %s failed", region.getRegionNameAsString())); + public void publishReadFailure(HRegionInfo region, Exception e) { + LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e); } @Override - public void publishReadFailure(HRegionInfo region, HColumnDescriptor column) { + public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) { LOG.error(String.format("read from region %s column family %s failed", - region.getRegionNameAsString(), column.getNameAsString())); + region.getRegionNameAsString(), column.getNameAsString()), e); } @Override @@ -150,24 +153,27 @@ // initialize HBase conf and admin if (conf == null) conf = HBaseConfiguration.create(); admin = new HBaseAdmin(conf); + try { + // lets the canary monitor the cluster + do { + if (admin.isAborted()) { + LOG.error("HBaseAdmin aborted"); + return(1); + } - // lets the canary monitor the cluster - do { - if (admin.isAborted()) { - LOG.error("HBaseAdmin aborted"); - return(1); - } - - if (tables_index >= 0) { - for (int i = tables_index; i < args.length; i++) { - sniff(args[i]); + if (tables_index >= 0) { + for (int i = tables_index; i < args.length; i++) { + sniff(admin, sink, args[i]); + } + } else { + sniff(); } - } else { - sniff(); - } - Thread.sleep(interval); - } while (interval > 0); + Thread.sleep(interval); + } while (interval > 0); + } finally { + this.admin.close(); + } return(0); } @@ -186,16 +192,32 @@ */ private void sniff() throws Exception { for (HTableDescriptor table : admin.listTables()) { - sniff(table); + sniff(admin, sink, table); } } - /* - * canary entry point to monitor specified table. + /** + * Canary entry point for specified table. + * @param admin + * @param tableName + * @throws Exception */ - private void sniff(String tableName) throws Exception { + public static void sniff(final HBaseAdmin admin, String tableName) + throws Exception { + sniff(admin, new StdOutSink(), tableName); + } + + /** + * Canary entry point for specified table. + * @param admin + * @param sink + * @param tableName + * @throws Exception + */ + private static void sniff(final HBaseAdmin admin, final Sink sink, String tableName) + throws Exception { if (admin.isTableAvailable(tableName)) { - sniff(admin.getTableDescriptor(tableName.getBytes())); + sniff(admin, sink, admin.getTableDescriptor(tableName.getBytes())); } else { LOG.warn(String.format("Table %s is not available", tableName)); } @@ -205,7 +227,8 @@ * Loops over regions that owns this table, * and output some information abouts the state. */ - private void sniff(HTableDescriptor tableDesc) throws Exception { + private static void sniff(final HBaseAdmin admin, final Sink sink, HTableDescriptor tableDesc) + throws Exception { HTable table = null; try { @@ -216,9 +239,9 @@ for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) { try { - sniffRegion(region, table); + sniffRegion(admin, sink, region, table); } catch (Exception e) { - sink.publishReadFailure(region); + sink.publishReadFailure(region, e); } } } @@ -227,20 +250,42 @@ * For each column family of the region tries to get one row * and outputs the latency, or the failure. */ - private void sniffRegion(HRegionInfo region, HTable table) throws Exception { + private static void sniffRegion(final HBaseAdmin admin, final Sink sink, HRegionInfo region, + HTable table) + throws Exception { HTableDescriptor tableDesc = table.getTableDescriptor(); + StopWatch stopWatch = new StopWatch(); for (HColumnDescriptor column : tableDesc.getColumnFamilies()) { - Get get = new Get(region.getStartKey()); - get.addFamily(column.getName()); - - try { - long startTime = System.currentTimeMillis(); - table.get(get); - long time = System.currentTimeMillis() - startTime; - - sink.publishReadTiming(region, column, time); - } catch (Exception e) { - sink.publishReadFailure(region, column); + stopWatch.reset(); + byte [] startKey = region.getStartKey(); + if (startKey == null || startKey.length <= 0) { + // Can't do a get on empty start row so do a Scan of first element if any instead. + Scan scan = new Scan(); + scan.addFamily(column.getName()); + scan.setBatch(1); + ResultScanner scanner = null; + try { + stopWatch.start(); + scanner = table.getScanner(scan); + scanner.next(); + stopWatch.stop(); + sink.publishReadTiming(region, column, stopWatch.getTime()); + } catch (Exception e) { + sink.publishReadFailure(region, column, e); + } finally { + if (scanner != null) scanner.close(); + } + } else { + Get get = new Get(region.getStartKey()); + get.addFamily(column.getName()); + try { + stopWatch.start(); + table.get(get); + stopWatch.stop(); + sink.publishReadTiming(region, column, stopWatch.getTime()); + } catch (Exception e) { + sink.publishReadFailure(region, column, e); + } } } } @@ -250,4 +295,3 @@ System.exit(exitCode); } } - Index: hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (revision 1503079) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (working copy) @@ -100,6 +100,7 @@ import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKConfig; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.hadoop.hbase.tool.Canary; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -1652,7 +1653,7 @@ * @param tableName user table to lookup in .META. * @return region server that holds it, null if the row doesn't exist * @throws IOException - * @throws InterruptedException + * @throws InterruptedException */ public HRegionServer getRSForFirstRegionInTable(byte[] tableName) throws IOException, InterruptedException { @@ -2150,6 +2151,16 @@ System.currentTimeMillis() - remainder < timeoutMillis); Thread.sleep(200); } + // Finally make sure all regions are fully open and online out on the cluster. Regions may be + // in the .META. table and almost open on all regionservers but there setting the region + // online in the regionserver is the very last thing done and can take a little while to happen. + // Below we do a get. The get will retry if a NotServeringRegionException or a + // RegionOpeningException. It is crass but when done all will be online. + try { + Canary.sniff(getHBaseAdmin(), Bytes.toString(table)); + } catch (Exception e) { + throw new IOException(e); + } } /**