Index: hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java (revision 1554570) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java (working copy) @@ -127,9 +127,11 @@ private Sink sink = null; private boolean useRegExp; - private long timeout = DEFAULT_TIMEOUT; - private boolean failOnError = true; + private static long timeout = DEFAULT_TIMEOUT; + private static boolean failOnError = true; private boolean regionServerMode = false; + private static long startTimeCanary; + private static volatile int errorCode; public Canary() { this(new RegionServerStdOutSink()); @@ -198,7 +200,7 @@ } try { - this.timeout = Long.parseLong(args[i]); + timeout = Long.parseLong(args[i]); } catch (NumberFormatException e) { System.err.println("-t needs a numeric value argument."); printUsageAndExit(); @@ -213,7 +215,7 @@ printUsageAndExit(); } - this.failOnError = Boolean.parseBoolean(args[i]); + failOnError = Boolean.parseBoolean(args[i]); } else { // no options match System.err.println(cmd + " options is invalid."); @@ -228,42 +230,32 @@ // start to prepare the stuffs Monitor monitor = null; Thread monitorThread = null; - long startTime = 0; - long currentTimeLength = 0; do { // do monitor !! monitor = this.newMonitor(index, args); monitorThread = new Thread(monitor); - startTime = System.currentTimeMillis(); monitorThread.start(); while (!monitor.isDone()) { // wait for 1 sec Thread.sleep(1000); // exit if any error occurs - if (this.failOnError && monitor.hasError()) { + if (failOnError && monitor.hasError()) { monitorThread.interrupt(); - System.exit(monitor.errorCode); - } - currentTimeLength = System.currentTimeMillis() - startTime; - if (currentTimeLength > this.timeout) { - LOG.error("The monitor is running too long (" + currentTimeLength - + ") after timeout limit:" + this.timeout - + " will be killed itself !!"); - monitor.errorCode = TIMEOUT_ERROR_EXIT_CODE; - break; + System.exit(errorCode); } } - if (this.failOnError && monitor.hasError()) { + if (failOnError && monitor.hasError()) { monitorThread.interrupt(); - System.exit(monitor.errorCode); + System.exit(errorCode); } Thread.sleep(interval); } while (interval > 0); - return(monitor.errorCode); + LOG.info("Canary finished"); + return(errorCode); } private void printUsageAndExit() { @@ -322,7 +314,6 @@ protected boolean useRegExp; protected boolean done = false; - protected int errorCode = 0; protected Sink sink; public boolean isDone() { @@ -352,11 +343,11 @@ this.admin = new HBaseAdmin(config); } catch (Exception e) { LOG.error("Initial HBaseAdmin failed...", e); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } } else if (admin.isAborted()) { LOG.error("HBaseAdmin aborted"); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } return !this.hasError(); } @@ -372,6 +363,7 @@ @Override public void run() { + startTimeCanary = System.currentTimeMillis(); if(this.initAdmin()) { try { if (this.targets != null && this.targets.length > 0) { @@ -384,7 +376,7 @@ } } catch (Exception e) { LOG.error("Run regionMonitor failed", e); - this.errorCode = ERROR_EXIT_CODE; + errorCode = ERROR_EXIT_CODE; } } this.done = true; @@ -418,7 +410,7 @@ String msg = "No any HTable found, tablePattern:" + Arrays.toString(monitorTargets); LOG.error(msg); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; throw new TableNotFoundException(msg); } } else { @@ -502,7 +494,12 @@ Scan scan = null; ResultScanner rs = null; StopWatch stopWatch = new StopWatch(); + ServerName serverName = table.getRegionLocations().get(region); for (HColumnDescriptor column : tableDesc.getColumnFamilies()) { + long elapsedTime = System.currentTimeMillis() - startTimeCanary; + checkTimeout(region, serverName, elapsedTime); + LOG.info("Sniffing Region: " + region.getRegionNameAsString() + + " - ServerName: " + serverName.toString()); stopWatch.reset(); startKey = region.getStartKey(); // Can't do a get on empty start row so do a Scan of first element if any instead. @@ -540,6 +537,24 @@ } } } + + /** + * Check timeout and set TIMEOUT_ERROR_EXIT_CODE + * default timeout is 600000 miliseconds. + * @param region Region where occured timeout. + * @param serverName ServerName where occured timeout. + * @param elapsedTime Used to compare with timeout value. + */ + private static void checkTimeout(HRegionInfo region, ServerName serverName, long elapsedTime) { + if (elapsedTime >= timeout) { + LOG.info("Timeout Region: " + region.getRegionNameAsString() + + " - ServerName: " + serverName.getServerName()); + errorCode = TIMEOUT_ERROR_EXIT_CODE; + if (failOnError) { + System.exit(errorCode); + } + } + } //a monitor for regionserver mode private static class RegionServerMonitor extends Monitor { @@ -554,6 +569,7 @@ @Override public void run() { + startTimeCanary = System.currentTimeMillis(); if (this.initAdmin() && this.checkNoTableNames()) { Map> rsAndRMap = this.filterRegionServerByName(); this.monitorRegionServers(rsAndRMap); @@ -569,7 +585,7 @@ tableNames = this.admin.listTableNames(); } catch (IOException e) { LOG.error("Get listTableNames failed", e); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; return false; } @@ -586,7 +602,7 @@ if (foundTableNames.size() > 0) { System.err.println("Cannot pass a tablename when using the -regionserver " + "option, tablenames:" + foundTableNames.toString()); - this.errorCode = USAGE_EXIT_CODE; + errorCode = USAGE_EXIT_CODE; } return foundTableNames.size() == 0; } @@ -609,6 +625,11 @@ try { tableName = region.getTable().getNameAsString(); table = new HTable(this.admin.getConfiguration(), tableName); + long elapsedTime = System.currentTimeMillis() - startTimeCanary; + ServerName serverNameObj = table.getRegionLocations().get(region); + checkTimeout(region, serverNameObj, elapsedTime); + LOG.info("Sniffing Region: " + region.getRegionNameAsString() + + " ServerName: " + serverNameObj.getServerName()); startKey = region.getStartKey(); // Can't do a get on empty start row so do a Scan of first element if any instead. if(startKey.length > 0) { @@ -636,7 +657,7 @@ } catch (IOException e) { this.getSink().publishReadFailure(tableName, serverName); LOG.error(e); - this.errorCode = ERROR_EXIT_CODE; + errorCode = ERROR_EXIT_CODE; } finally { if (table != null) { try { @@ -686,7 +707,7 @@ } catch (IOException e) { String msg = "Get HTables info failed"; LOG.error(msg, e); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } finally { if (table != null) { try { @@ -723,14 +744,14 @@ } if (!regExpFound) { LOG.error("No any RegionServerInfo found, regionServerPattern:" + rsName); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } } else { if (fullRsAndRMap.containsKey(rsName)) { filteredRsAndRMap.put(rsName, fullRsAndRMap.get(rsName)); } else { LOG.error("No any RegionServerInfo found, regionServerName:" + rsName); - this.errorCode = INIT_ERROR_EXIT_CODE; + errorCode = INIT_ERROR_EXIT_CODE; } } }