commit 14309e2471363b159b103bcb80a02fa42456c0d4 Author: nileema Date: 28 minutes ago add metrics to hmaster diff --git a/src/main/java/org/apache/hadoop/hbase/Server.java b/src/main/java/org/apache/hadoop/hbase/Server.java index de19e2c..0b7cebd 100644 --- a/src/main/java/org/apache/hadoop/hbase/Server.java +++ b/src/main/java/org/apache/hadoop/hbase/Server.java @@ -47,4 +47,4 @@ public interface Server extends Abortable, Stoppable { * @return The unique server name for this server. */ public ServerName getServerName(); -} \ No newline at end of file +} diff --git a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 9c60c71..1d89bfc 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -1227,6 +1227,7 @@ public class AssignmentManager extends ZooKeeperListener { plan.getDestination()); // Send OPEN RPC. This can fail if the server on other end is is not up. serverManager.sendRegionOpen(plan.getDestination(), state.getRegion()); + this.master.getMetrics().incRegionsOpened(); break; } catch (Throwable t) { LOG.warn("Failed assignment of " + diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 64c45fc..57541a7 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -54,8 +54,8 @@ import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.MetaScanner; -import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; +import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType; import org.apache.hadoop.hbase.ipc.HBaseRPC; @@ -624,6 +624,12 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { public ZooKeeperWatcher getZooKeeperWatcher() { return this.zooKeeper; } + /** + * @return Server metrics + */ + public MasterMetrics getMetrics() { + return this.metrics; + } /* * Start up all services. If any of these threads gets an unhandled exception diff --git a/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index 2f86f04..f4abaec 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -30,6 +30,7 @@ import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -225,7 +226,7 @@ public class MasterFileSystem { } public void splitLog(final List serverNames) { - long splitTime = 0, splitLogSize = 0; + long splitTime = 0, splitLogSize = 0, splitCount = 0; List logDirs = new ArrayList(); for(ServerName serverName: serverNames){ Path logDir = new Path(this.rootdir, HLog.getHLogDirectoryName(serverName.toString())); @@ -265,6 +266,7 @@ public class MasterFileSystem { oldLogDir, this.fs); splitter.splitLog(); } + splitCount = splitter.getSplitCount(); splitTime = splitter.getTime(); splitLogSize = splitter.getSize(); } catch (IOException e) { @@ -276,7 +278,7 @@ public class MasterFileSystem { } if (this.metrics != null) { - this.metrics.addSplit(splitTime, splitLogSize); + this.metrics.addSplit(splitTime, splitCount, splitLogSize); } } @@ -478,4 +480,4 @@ public class MasterFileSystem { this.services.getTableDescriptors().add(htd); return htd; } -} \ No newline at end of file +} diff --git a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index c395b5b..6177a48 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -273,7 +273,7 @@ public class ServerManager { } /** @return the count of active regionservers */ - int countOfRegionServers() { + public int countOfRegionServers() { // Presumes onlineServers is a concurrent map return this.onlineServers.size(); } @@ -380,6 +380,7 @@ public class ServerManager { LOG.debug("Added=" + serverName + " to dead servers, submitted shutdown handler to be executed, root=" + carryingRoot + ", meta=" + carryingMeta); + this.master.getMetrics().incRegionServerExpired(); } // RPC methods to region servers diff --git a/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java b/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java index 9e4cf73..f3ad608 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java +++ b/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java @@ -21,15 +21,16 @@ import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.metrics.HBaseInfo; import org.apache.hadoop.hbase.metrics.MetricsRate; -import org.apache.hadoop.hbase.metrics.PersistentMetricsTimeVaryingRate; import org.apache.hadoop.metrics.ContextFactory; import org.apache.hadoop.metrics.MetricsContext; import org.apache.hadoop.metrics.MetricsRecord; import org.apache.hadoop.metrics.MetricsUtil; import org.apache.hadoop.metrics.Updater; import org.apache.hadoop.metrics.jvm.JvmMetrics; +import org.apache.hadoop.metrics.util.MetricsIntValue; import org.apache.hadoop.metrics.util.MetricsLongValue; import org.apache.hadoop.metrics.util.MetricsRegistry; @@ -57,12 +58,39 @@ public class MasterMetrics implements Updater { new MetricsRate("cluster_requests", registry); /** Time it takes to finish HLog.splitLog() */ - final PersistentMetricsTimeVaryingRate splitTime = - new PersistentMetricsTimeVaryingRate("splitTime", registry); + final MetricsLongValue splitTime = + new MetricsLongValue("splitTime", registry); - /** Size of HLog files being split */ - final PersistentMetricsTimeVaryingRate splitSize = - new PersistentMetricsTimeVaryingRate("splitSize", registry); + + /* Number of active region servers. This number is updated + * every time a regionserver joins or leaves. + */ + public MetricsIntValue numRegionServers = + new MetricsIntValue("numRegionServers", registry); + + /* This is the number of dead region servers. + * This is cumululative across all intervals from startup time. + */ + public MetricsIntValue numRSExpired = + new MetricsIntValue("numRSExpired", registry); + + /** Metrics to keep track of the number and size of logs split. + * This is cumulative across all intervals from startup time. + */ + public MetricsLongValue numLogsSplit = + new MetricsLongValue("numLogsSplit", registry); + + private MetricsLongValue sizeOfLogsSplit = + new MetricsLongValue("sizeOfLogsSplit", registry); + + /** Track the number of regions opened. Useful for identifying + * open/close of regions due to load balancing. + * This is a cumulative metric. + */ + private MetricsIntValue numRegionsOpened = + new MetricsIntValue("numRegionsOpened", registry); + + private ServerManager serverManager; public MasterMetrics(final String name) { MetricsContext context = MetricsUtil.getContext("hbase"); @@ -89,6 +117,11 @@ public class MasterMetrics implements Updater { LOG.info("Initialized"); } + public MasterMetrics(final String name, ServerManager serverMgr) { + this(name); + serverManager = serverMgr; + } + public void shutdown() { if (masterStatistics != null) masterStatistics.shutdown(); @@ -102,19 +135,21 @@ public class MasterMetrics implements Updater { public void doUpdates(MetricsContext unused) { synchronized (this) { this.lastUpdate = System.currentTimeMillis(); - + this.numRegionServers.set(this.serverManager.countOfRegionServers()); + // has the extended period for long-living stats elapsed? if (this.extendedPeriod > 0 && this.lastUpdate - this.lastExtUpdate >= this.extendedPeriod) { this.lastExtUpdate = this.lastUpdate; - this.splitTime.resetMinMaxAvg(); - this.splitSize.resetMinMaxAvg(); this.resetAllMinMax(); } - + numRSExpired.set(this.serverManager.getDeadServers().size()); this.cluster_requests.pushMetric(metricsRecord); this.splitTime.pushMetric(metricsRecord); - this.splitSize.pushMetric(metricsRecord); + this.numRSExpired.pushMetric(metricsRecord); + this.numLogsSplit.pushMetric(metricsRecord); + this.sizeOfLogsSplit.pushMetric(metricsRecord); + this.numRegionsOpened.pushMetric(metricsRecord); } this.metricsRecord.update(); } @@ -127,12 +162,13 @@ public class MasterMetrics implements Updater { * Record a single instance of a split * @param time time that the split took * @param size length of original HLogs that were split + * @param size of the logs that were split */ - public synchronized void addSplit(long time, long size) { - splitTime.inc(time); - splitSize.inc(size); + public synchronized void addSplit(long time, long splitCount, long splitSize) { + splitTime.set(splitTime.get() + time); + numLogsSplit.set(numLogsSplit.get() + splitCount); + sizeOfLogsSplit.set(sizeOfLogsSplit.get() + splitSize); } - /** * @return Count of requests. */ @@ -146,4 +182,12 @@ public class MasterMetrics implements Updater { public void incrementRequests(final int inc) { this.cluster_requests.inc(inc); } + + public synchronized void incRegionsOpened() { + numRegionsOpened.set(numRegionsOpened.get() + 1); + } + + public synchronized void incRegionServerExpired() { + numRSExpired.set(numRSExpired.get() + 1); + } } diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java index 2c7571c..64947d8 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java @@ -84,6 +84,7 @@ public class HLogSplitter { private boolean hasSplit = false; private long splitTime = 0; private long splitSize = 0; + private int splitCount = 0; // Parameters for split process @@ -220,6 +221,13 @@ public class HLogSplitter { } /** + * @return number of files split + */ + public long getSplitCount() { + return this.splitCount; + } + + /** * @return aggregate size of hlogs that were split */ public long getSize() { @@ -266,7 +274,8 @@ public class HLogSplitter { long totalBytesToSplit = countTotalBytes(logfiles); splitSize = 0; - + splitCount = 0; + outputSink.startWriterThreads(entryBuffers); try { @@ -275,6 +284,7 @@ public class HLogSplitter { Path logPath = log.getPath(); long logLength = log.getLen(); splitSize += logLength; + splitCount++; logAndReport("Splitting hlog " + (i++ + 1) + " of " + logfiles.length + ": " + logPath + ", length=" + logLength); Reader in;