diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java index bbf858d..6f109e4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java @@ -39,6 +39,7 @@ import java.util.Random; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; @@ -115,11 +116,15 @@ public final class Canary implements Tool { public long incReadFailureCount(); public void publishReadFailure(HRegionInfo region, Exception e); public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e); + public void updateReadFailedHostList(HRegionInfo region, String serverName); + public Map getReadFailures(); public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); public long getWriteFailureCount(); public void publishWriteFailure(HRegionInfo region, Exception e); public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e); public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime); + public void updateWriteFailedHostList(HRegionInfo region, String serverName); + public Map getWriteFailures(); } // new extended sink for output regionserver mode info // do not change the Sink interface directly due to maintaining the API @@ -134,6 +139,9 @@ public final class Canary implements Tool { private AtomicLong readFailureCount = new AtomicLong(0), writeFailureCount = new AtomicLong(0); + private Map readFailures = new ConcurrentHashMap(); + private Map writeFailures = new ConcurrentHashMap(); + @Override public long getReadFailureCount() { return readFailureCount.get(); @@ -158,9 +166,26 @@ public final class Canary implements Tool { } @Override + public void updateReadFailedHostList(HRegionInfo region, String serverName) { + LOG.error(String.format("Updating Read Failure List from region:%s on region server:%s", + region.getRegionNameAsString(), serverName)); + readFailures.put(region.getRegionNameAsString(), serverName); + } + + @Override public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) { LOG.info(String.format("read from region %s column family %s in %dms", - region.getRegionNameAsString(), column.getNameAsString(), msTime)); + region.getRegionNameAsString(), column.getNameAsString(), msTime)); + } + + @Override + public Map getReadFailures() { + return readFailures; + } + + @Override + public Map getWriteFailures() { + return writeFailures; } @Override @@ -186,6 +211,14 @@ public final class Canary implements Tool { LOG.info(String.format("write to region %s column family %s in %dms", region.getRegionNameAsString(), column.getNameAsString(), msTime)); } + + @Override + public void updateWriteFailedHostList(HRegionInfo region, String serverName) { + LOG.error(String.format("Updating Write Failure List from region:%s on region server:%s", + region.getRegionNameAsString(), serverName)); + writeFailures.put(region.getRegionNameAsString(), serverName); + } + } // a ExtendedSink implementation public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink { @@ -265,11 +298,13 @@ public final class Canary implements Tool { private Sink sink; private TaskType taskType; private boolean rawScanEnabled; + private ServerName serverName; - RegionTask(Connection connection, HRegionInfo region, Sink sink, TaskType taskType, - boolean rawScanEnabled) { + RegionTask(Connection connection, HRegionInfo region, ServerName serverName, Sink sink, + TaskType taskType, boolean rawScanEnabled) { this.connection = connection; this.region = region; + this.serverName = serverName; this.sink = sink; this.taskType = taskType; this.rawScanEnabled = rawScanEnabled; @@ -356,6 +391,7 @@ public final class Canary implements Tool { sink.publishReadTiming(region, column, stopWatch.getTime()); } catch (Exception e) { sink.publishReadFailure(region, column, e); + sink.updateReadFailedHostList(region, serverName.getHostname()); } finally { if (rs != null) { rs.close(); @@ -412,6 +448,7 @@ public final class Canary implements Tool { table.close(); } catch (IOException e) { sink.publishWriteFailure(region, e); + sink.updateWriteFailedHostList(region, serverName.getHostname()); } return null; } @@ -733,6 +770,14 @@ public final class Canary implements Tool { return monitor.errorCode; } + public Map getReadFailures() { + return sink.getReadFailures(); + } + + public Map getWriteFailures() { + return sink.getWriteFailures(); + } + private void printUsageAndExit() { System.err.printf( "Usage: bin/hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n", @@ -940,6 +985,24 @@ public final class Canary implements Tool { LOG.error("Sniff region failed!", e); } } + + Map readFailures = sink.getReadFailures(); + if(readFailures != null && readFailures.size() > 0){ + LOG.info("==== Read Canary Failure Summary ==="); + LOG.info("Region \t Server Name"); + for(Map.Entry e : readFailures.entrySet()) { + LOG.error(e.getKey() + "\t" + e.getValue()); + } + } + + Map writeFailures = sink.getWriteFailures(); + if(writeFailures != null && writeFailures.size() > 0){ + LOG.info("==== Write Canary Failure Summary ==="); + LOG.info("Region \t Server Name"); + for(Map.Entry e : writeFailures.entrySet()) { + LOG.error(e.getKey() + "\t" + e.getValue()); + } + } } catch (Exception e) { LOG.error("Run regionMonitor failed", e); this.errorCode = ERROR_EXIT_CODE; @@ -1137,22 +1200,18 @@ public final class Canary implements Tool { LOG.debug(String.format("reading list of regions for table %s", tableDesc.getTableName())); } - Table table = null; - try { - table = admin.getConnection().getTable(tableDesc.getTableName()); - } catch (TableNotFoundException e) { - return new ArrayList>(); - } List tasks = new ArrayList(); + RegionLocator regionLocator = null; try { - List hris = admin.getTableRegions(tableDesc.getTableName()); - if (hris != null) { - for (HRegionInfo region : hris) { - tasks.add(new RegionTask(admin.getConnection(), region, sink, taskType, rawScanEnabled)); - } + regionLocator = admin.getConnection().getRegionLocator(tableDesc.getTableName()); + for (HRegionLocation location : regionLocator.getAllRegionLocations()) { + ServerName rs = location.getServerName(); + HRegionInfo region = location.getRegionInfo(); + tasks.add(new RegionTask(admin.getConnection(), region, rs, sink, taskType, rawScanEnabled)); } + } finally { - table.close(); + regionLocator.close(); } return executor.invokeAll(tasks); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java index fb33d7e..dc176c3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java @@ -53,6 +53,7 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; import static org.mockito.Matchers.argThat; import static org.mockito.Mockito.never; +import static org.junit.Assert.assertEquals; @RunWith(MockitoJUnitRunner.class) @Category({MediumTests.class}) @@ -110,10 +111,11 @@ public class TestCanaryTool { ExecutorService executor = new ScheduledThreadPoolExecutor(1); Canary.RegionServerStdOutSink sink = spy(new Canary.RegionServerStdOutSink()); Canary canary = new Canary(executor, sink); - String[] args = { "-t", "10000", "testTable" }; + String[] args = { "-writeSniffing", "-t", "10000", "testTable" }; ToolRunner.run(testingUtility.getConfiguration(), canary, args); - verify(sink, atLeastOnce()) - .publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong()); + assertEquals("verify no read error count", 0, canary.getReadFailures().size()); + assertEquals("verify no write error count", 0, canary.getWriteFailures().size()); + verify(sink, atLeastOnce()).publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong()); } //no table created, so there should be no regions @@ -146,6 +148,7 @@ public class TestCanaryTool { public void testRawScanConfig() throws Exception { TableName tableName = TableName.valueOf("testTableRawScan"); Table table = testingUtility.createTable(tableName, new byte[][] { FAMILY }); + // insert some test rows for (int i=0; i<1000; i++) { byte[] iBytes = Bytes.toBytes(i); @@ -162,13 +165,15 @@ public class TestCanaryTool { ToolRunner.run(conf, canary, args); verify(sink, atLeastOnce()) .publishReadTiming(isA(HRegionInfo.class), isA(HColumnDescriptor.class), anyLong()); + assertEquals("verify no read error count", 0, canary.getReadFailures().size()); } - + private void runRegionserverCanary() throws Exception { ExecutorService executor = new ScheduledThreadPoolExecutor(1); Canary canary = new Canary(executor, new Canary.RegionServerStdOutSink()); String[] args = { "-t", "10000", "-regionserver"}; ToolRunner.run(testingUtility.getConfiguration(), canary, args); + assertEquals("verify no read error count", 0, canary.getReadFailures().size()); } }