diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java index 151be42..bf6149d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java @@ -93,9 +93,11 @@ import org.apache.hadoop.util.ToolRunner; public final class Canary implements Tool { // Sink interface used by the canary to outputs information public interface Sink { + public long getReadFailureCount(); public void publishReadFailure(HRegionInfo region, Exception e); public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e); public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); + public long getWriteFailureCount(); public void publishWriteFailure(HRegionInfo region, Exception e); public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e); public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime); @@ -110,13 +112,22 @@ public final class Canary implements Tool { // Simple implementation of canary sink that allows to plot on // file or standard output timings or failures. public static class StdOutSink implements Sink { + protected long readFailureCount, writeFailureCount; + + @Override + public long getReadFailureCount() { + return readFailureCount; + } + @Override public void publishReadFailure(HRegionInfo region, Exception e) { + readFailureCount++; LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e); } @Override public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) { + readFailureCount++; LOG.error(String.format("read from region %s column family %s failed", region.getRegionNameAsString(), column.getNameAsString()), e); } @@ -128,12 +139,19 @@ public final class Canary implements Tool { } @Override + public long getWriteFailureCount() { + return writeFailureCount; + } + + @Override public void publishWriteFailure(HRegionInfo region, Exception e) { + writeFailureCount++; LOG.error(String.format("write to region %s failed", region.getRegionNameAsString()), e); } @Override public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e) { + writeFailureCount++; LOG.error(String.format("write to region %s column family %s failed", region.getRegionNameAsString(), column.getNameAsString()), e); } @@ -149,6 +167,7 @@ public final class Canary implements Tool { @Override public void publishReadFailure(String table, String server) { + readFailureCount++; LOG.error(String.format("Read from table:%s on region server:%s", table, server)); } @@ -434,6 +453,7 @@ public final class Canary implements Tool { private boolean regionServerMode = false; private boolean regionServerAllRegions = false; private boolean writeSniffing = false; + private boolean treatFailureAsError = false; private TableName writeTableName = DEFAULT_WRITE_TABLE_NAME; private ExecutorService executor; // threads to retrieve data from regionservers @@ -497,6 +517,8 @@ public final class Canary implements Tool { this.regionServerAllRegions = true; } else if(cmd.equals("-writeSniffing")) { this.writeSniffing = true; + } else if(cmd.equals("-treatFailureAsError")) { + this.treatFailureAsError = true; } else if (cmd.equals("-e")) { this.useRegExp = true; } else if (cmd.equals("-t")) { @@ -638,6 +660,7 @@ public final class Canary implements Tool { " default is true"); System.err.println(" -t timeout for a check, default is 600000 (milisecs)"); System.err.println(" -writeSniffing enable the write sniffing in canary"); + System.err.println(" -treatFailureAsError treats read / write failure as error"); System.err.println(" -writeTable The table used for write sniffing." + " Default is hbase:canary"); System.exit(USAGE_EXIT_CODE); @@ -663,11 +686,12 @@ public final class Canary implements Tool { if (this.regionServerMode) { monitor = new RegionServerMonitor(connection, monitorTargets, this.useRegExp, - (ExtendedSink) this.sink, this.executor, this.regionServerAllRegions); + (ExtendedSink) this.sink, this.executor, this.regionServerAllRegions, + this.treatFailureAsError); } else { monitor = new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor, - this.writeSniffing, this.writeTableName); + this.writeSniffing, this.writeTableName, this.treatFailureAsError); } return monitor; } @@ -679,6 +703,7 @@ public final class Canary implements Tool { protected Admin admin; protected String[] targets; protected boolean useRegExp; + protected boolean treatFailureAsError; protected boolean initialized = false; protected boolean done = false; @@ -691,7 +716,11 @@ public final class Canary implements Tool { } public boolean hasError() { - return errorCode != 0; + if (errorCode != 0) { + return true; + } + return treatFailureAsError && + (sink.getReadFailureCount() > 0 || sink.getWriteFailureCount() > 0); } @Override @@ -700,12 +729,13 @@ public final class Canary implements Tool { } protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink, - ExecutorService executor) { + ExecutorService executor, boolean treatFailureAsError) { if (null == connection) throw new IllegalArgumentException("connection shall not be null"); this.connection = connection; this.targets = monitorTargets; this.useRegExp = useRegExp; + this.treatFailureAsError = treatFailureAsError; this.sink = sink; this.executor = executor; } @@ -745,8 +775,9 @@ public final class Canary implements Tool { private int checkPeriod; public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp, - Sink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName) { - super(connection, monitorTargets, useRegExp, sink, executor); + Sink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName, + boolean treatFailureAsError) { + super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError); Configuration conf = connection.getConfiguration(); this.writeSniffing = writeSniffing; this.writeTableName = writeTableName; @@ -990,8 +1021,9 @@ public final class Canary implements Tool { private boolean allRegions; public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp, - ExtendedSink sink, ExecutorService executor, boolean allRegions) { - super(connection, monitorTargets, useRegExp, sink, executor); + ExtendedSink sink, ExecutorService executor, boolean allRegions, + boolean treatFailureAsError) { + super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError); this.allRegions = allRegions; } @@ -1086,7 +1118,7 @@ public final class Canary implements Tool { } } catch (InterruptedException e) { this.errorCode = ERROR_EXIT_CODE; - LOG.error("Sniff regionserver failed!", e); + LOG.error("Sniff regionserver interrupted!", e); } }