Index: hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java (revision 1553226) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java (working copy) @@ -66,7 +66,8 @@ public interface Sink { public void publishReadFailure(HRegionInfo region, Exception e); public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e); - public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); + public void publishInfo(HRegionInfo region, ServerName serverName); + public void publishInfoTimeout(HRegionInfo region, ServerName serverName); } // new extended sink for output regionserver mode info // do not change the Sink interface directly due to maintaining the API @@ -80,19 +81,25 @@ public static class StdOutSink implements Sink { @Override public void publishReadFailure(HRegionInfo region, Exception e) { - LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e); + LOG.error(String.format("Read from Region %s failed", region.getRegionNameAsString()), e); } @Override public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) { - LOG.error(String.format("read from region %s column family %s failed", + LOG.error(String.format("Read from Region %s Column Family %s Failed", region.getRegionNameAsString(), column.getNameAsString()), e); } @Override - public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) { - LOG.info(String.format("read from region %s column family %s in %dms", - region.getRegionNameAsString(), column.getNameAsString(), msTime)); + public void publishInfo(HRegionInfo region, ServerName serverName) { + LOG.info(String.format("Read from Region: %s at ServerName: %s", + region.getRegionNameAsString(), serverName.getServerName())); + } + + @Override + public void publishInfoTimeout(HRegionInfo region, ServerName serverName) { + LOG.info(String.format("Timeout from Region: %s at ServerName: %s", + region.getRegionNameAsString(), serverName.getServerName())); } } // a ExtendedSink implementation @@ -126,9 +133,11 @@ private Sink sink = null; private boolean useRegExp; - private long timeout = DEFAULT_TIMEOUT; - private boolean failOnError = true; + private static long timeout = DEFAULT_TIMEOUT; + private static boolean failOnError = true; private boolean regionServerMode = false; + private static long startTimeCanary; + private static int errorCode; public Canary() { this(new RegionServerStdOutSink()); @@ -197,7 +206,7 @@ } try { - this.timeout = Long.parseLong(args[i]); + timeout = Long.parseLong(args[i]); } catch (NumberFormatException e) { System.err.println("-t needs a numeric value argument."); printUsageAndExit(); @@ -227,14 +236,11 @@ // start to prepare the stuffs Monitor monitor = null; Thread monitorThread = null; - long startTime = 0; - long currentTimeLength = 0; do { // do monitor !! monitor = this.newMonitor(index, args); monitorThread = new Thread(monitor); - startTime = System.currentTimeMillis(); monitorThread.start(); while (!monitor.isDone()) { // wait for 1 sec @@ -242,27 +248,21 @@ // exit if any error occurs if (this.failOnError && monitor.hasError()) { monitorThread.interrupt(); - System.exit(monitor.errorCode); - } - currentTimeLength = System.currentTimeMillis() - startTime; - if (currentTimeLength > this.timeout) { - LOG.error("The monitor is running too long (" + currentTimeLength - + ") after timeout limit:" + this.timeout - + " will be killed itself !!"); - monitor.errorCode = TIMEOUT_ERROR_EXIT_CODE; - break; + System.exit(errorCode); } } if (this.failOnError && monitor.hasError()) { monitorThread.interrupt(); - System.exit(monitor.errorCode); + System.exit(errorCode); } Thread.sleep(interval); } while (interval > 0); - return(monitor.errorCode); + LOG.info("Canary finished"); + + return(errorCode); } private void printUsageAndExit() { @@ -321,7 +321,6 @@ protected boolean useRegExp; protected boolean done = false; - protected int errorCode = 0; protected Sink sink; public boolean isDone() { @@ -351,11 +350,11 @@ this.admin = new HBaseAdmin(config); } catch (Exception e) { LOG.error("Initial HBaseAdmin failed...", e); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } } else if (admin.isAborted()) { LOG.error("HBaseAdmin aborted"); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } return !this.hasError(); } @@ -371,6 +370,7 @@ @Override public void run() { + startTimeCanary = System.currentTimeMillis(); if(this.initAdmin()) { try { if (this.targets != null && this.targets.length > 0) { @@ -383,7 +383,7 @@ } } catch (Exception e) { LOG.error("Run regionMonitor failed", e); - this.errorCode = ERROR_EXIT_CODE; + errorCode = ERROR_EXIT_CODE; } } this.done = true; @@ -417,7 +417,7 @@ String msg = "No any HTable found, tablePattern:" + Arrays.toString(monitorTargets); LOG.error(msg); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; throw new TableNotFoundException(msg); } } else { @@ -497,7 +497,12 @@ Scan scan = null; ResultScanner rs = null; StopWatch stopWatch = new StopWatch(); + ServerName serverName = table.getRegionLocations().get(region); for (HColumnDescriptor column : tableDesc.getColumnFamilies()) { + long elapsedTime = System.currentTimeMillis() - startTimeCanary; + checkTimeout(sink, region, serverName, elapsedTime); + sink.publishInfo(region, serverName); + stopWatch.reset(); startKey = region.getStartKey(); // Can't do a get on empty start row so do a Scan of first element if any instead. @@ -516,12 +521,10 @@ stopWatch.start(); table.get(get); stopWatch.stop(); - sink.publishReadTiming(region, column, stopWatch.getTime()); } else { stopWatch.start(); rs = table.getScanner(scan); stopWatch.stop(); - sink.publishReadTiming(region, column, stopWatch.getTime()); } } catch (Exception e) { sink.publishReadFailure(region, column, e); @@ -535,6 +538,25 @@ } } } + + /** + * Check timeout and set TIMEOUT_ERROR_EXIT_CODE + * default timeout is 60000 miliseconds. + * @param sink Sink object to log message. + * @param region Region where occured timeout. + * @param serverName ServerName where occured timeout. + * @param elapsedTime Used to compare with timeout value. + */ + private static void checkTimeout(final Sink sink, HRegionInfo region, ServerName serverName, + long elapsedTime) { + if (elapsedTime >= timeout) { + sink.publishInfoTimeout(region, serverName); + errorCode = TIMEOUT_ERROR_EXIT_CODE; + if (failOnError) { + System.exit(errorCode); + } + } + } //a monitor for regionserver mode private static class RegionServerMonitor extends Monitor { @@ -549,6 +571,7 @@ @Override public void run() { + startTimeCanary = System.currentTimeMillis(); if (this.initAdmin() && this.checkNoTableNames()) { Map> rsAndRMap = this.filterRegionServerByName(); this.monitorRegionServers(rsAndRMap); @@ -564,7 +587,7 @@ tableNames = this.admin.listTableNames(); } catch (IOException e) { LOG.error("Get listTableNames failed", e); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; return false; } @@ -581,7 +604,7 @@ if (foundTableNames.size() > 0) { System.err.println("Cannot pass a tablename when using the -regionserver " + "option, tablenames:" + foundTableNames.toString()); - this.errorCode = USAGE_EXIT_CODE; + errorCode = USAGE_EXIT_CODE; } return foundTableNames.size() == 0; } @@ -604,6 +627,10 @@ try { tableName = region.getTable().getNameAsString(); table = new HTable(this.admin.getConfiguration(), tableName); + long elapsedTime = System.currentTimeMillis() - startTimeCanary; + ServerName serverNameInstance = table.getRegionLocations().get(region); + checkTimeout(sink, region, serverNameInstance, elapsedTime); + sink.publishInfo(region, serverNameInstance); startKey = region.getStartKey(); // Can't do a get on empty start row so do a Scan of first element if any instead. if(startKey.length > 0) { @@ -619,7 +646,6 @@ table.getScanner(scan); stopWatch.stop(); } - this.getSink().publishReadTiming(tableName, serverName, stopWatch.getTime()); } catch (TableNotFoundException tnfe) { // This is ignored because it doesn't imply that the regionserver is dead } catch (TableNotEnabledException tnee) { @@ -631,7 +657,7 @@ } catch (IOException e) { this.getSink().publishReadFailure(tableName, serverName); LOG.error(e); - this.errorCode = ERROR_EXIT_CODE; + errorCode = ERROR_EXIT_CODE; } finally { if (table != null) { try { @@ -681,7 +707,7 @@ } catch (IOException e) { String msg = "Get HTables info failed"; LOG.error(msg, e); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } finally { if (table != null) { try { @@ -718,14 +744,14 @@ } if (!regExpFound) { LOG.error("No any RegionServerInfo found, regionServerPattern:" + rsName); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } } else { if (fullRsAndRMap.containsKey(rsName)) { filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName)); } else { LOG.error("No any RegionServerInfo found, regionServerName:" + rsName); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } } }