From 7f284317589a30f496b07192214ca65e603dd61b Mon Sep 17 00:00:00 2001 From: Alan Gates Date: Tue, 1 Aug 2017 09:57:32 -0700 Subject: [PATCH 1/4] Implemented metrics in standalone-metastore. Ported existing metastore classes to use them. --- .../common/metrics/common/MetricsConstant.java | 14 -- .../hive/metastore/TestMetaStoreMetrics.java | 78 ++++---- .../hadoop/hive/metastore/HMSMetricsListener.java | 50 +++-- .../hadoop/hive/metastore/HiveMetaStore.java | 160 +++++++++------- .../apache/hadoop/hive/metastore/ObjectStore.java | 24 ++- .../hadoop/hive/metastore/txn/TxnHandler.java | 26 ++- .../hadoop/hive/metastore/TestObjectStore.java | 25 +-- .../apache/hadoop/hive/metastore/ThreadPool.java | 63 +++++++ .../hive/metastore/metrics/JsonReporter.java | 166 ++++++++++++++++ .../hadoop/hive/metastore/metrics/Metrics.java | 210 +++++++++++++++++++++ .../hive/metastore/metrics/MetricsConstants.java | 46 +++++ .../hadoop/hive/metastore/metrics/TestMetrics.java | 204 ++++++++++++++++++++ 12 files changed, 893 insertions(+), 173 deletions(-) create mode 100644 standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ThreadPool.java create mode 100644 standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/JsonReporter.java create mode 100644 standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/Metrics.java create mode 100644 standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/MetricsConstants.java create mode 100644 standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/metrics/TestMetrics.java diff --git a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java index b2e78c36587..5d28e2d2c80 100644 --- a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java +++ b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java @@ -47,20 +47,6 @@ public static final String SQL_OPERATION_PREFIX = "hs2_sql_operation_"; public static final String COMPLETED_SQL_OPERATION_PREFIX = "hs2_completed_sql_operation_"; - public static final String INIT_TOTAL_DATABASES = "init_total_count_dbs"; - public static final String INIT_TOTAL_TABLES = "init_total_count_tables"; - public static final String INIT_TOTAL_PARTITIONS = "init_total_count_partitions"; - - public static final String CREATE_TOTAL_DATABASES = "create_total_count_dbs"; - public static final String CREATE_TOTAL_TABLES = "create_total_count_tables"; - public static final String CREATE_TOTAL_PARTITIONS = "create_total_count_partitions"; - - public static final String DELETE_TOTAL_DATABASES = "delete_total_count_dbs"; - public static final String DELETE_TOTAL_TABLES = "delete_total_count_tables"; - public static final String DELETE_TOTAL_PARTITIONS = "delete_total_count_partitions"; - - public static final String DIRECTSQL_ERRORS = "directsql_errors"; - // The number of Hive operations that are waiting to enter the compile block public static final String WAITING_COMPILE_OPS = "waiting_compile_ops"; diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMetrics.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMetrics.java index 9b6cab3207c..0e3da3fd9cc 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMetrics.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMetrics.java @@ -18,14 +18,13 @@ package org.apache.hadoop.hive.metastore; import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.common.metrics.MetricsTestUtils; -import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; -import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; -import org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.metrics.Metrics; +import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -38,7 +37,6 @@ private static HiveConf hiveConf; private static Driver driver; - private static CodahaleMetrics metrics; @BeforeClass public static void before() throws Exception { @@ -53,10 +51,6 @@ public static void before() throws Exception { .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); - MetricsFactory.close(); - MetricsFactory.init(hiveConf); - metrics = (CodahaleMetrics) MetricsFactory.getInstance(); - //Increments one HMS connection MetaStoreUtils.startMetaStore(port, HadoopThriftAuthBridge.getBridge(), hiveConf); @@ -69,23 +63,19 @@ public static void before() throws Exception { @Test public void testMethodCounts() throws Exception { driver.run("show databases"); - String json = metrics.dumpJson(); //one call by init, one called here. - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.TIMER, "api_get_all_databases", 2); + Assert.assertEquals(1, Metrics.getRegistry().getTimers().get("api_get_all_databases").getCount()); } @Test public void testMetaDataCounts() throws Exception { - CodahaleMetrics metrics = (CodahaleMetrics) MetricsFactory.getInstance(); - String json = metrics.dumpJson(); - - int initDbCount = (new Integer((MetricsTestUtils.getJsonNode(json, MetricsTestUtils.GAUGE, - MetricsConstant.INIT_TOTAL_DATABASES)).asText())).intValue(); - int initTblCount = (new Integer((MetricsTestUtils.getJsonNode(json, MetricsTestUtils.GAUGE, - MetricsConstant.INIT_TOTAL_TABLES)).asText())).intValue(); - int initPartCount = (new Integer((MetricsTestUtils.getJsonNode(json, MetricsTestUtils.GAUGE, - MetricsConstant.INIT_TOTAL_PARTITIONS)).asText())).intValue(); + int initDbCount = + (Integer)Metrics.getRegistry().getGauges().get(MetricsConstants.TOTAL_DATABASES).getValue(); + int initTblCount = + (Integer)Metrics.getRegistry().getGauges().get(MetricsConstants.TOTAL_TABLES).getValue(); + int initPartCount = + (Integer)Metrics.getRegistry().getGauges().get(MetricsConstants.TOTAL_PARTITIONS).getValue(); //1 databases created driver.run("create database testdb1"); @@ -123,25 +113,22 @@ public void testMetaDataCounts() throws Exception { driver.run("use default"); driver.run("drop database tempdb cascade"); - json = metrics.dumpJson(); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.CREATE_TOTAL_DATABASES, 2); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.CREATE_TOTAL_TABLES, 7); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.CREATE_TOTAL_PARTITIONS, 9); + Assert.assertEquals(2, Metrics.getRegistry().getCounters().get(MetricsConstants.CREATE_TOTAL_DATABASES).getCount()); + Assert.assertEquals(7, Metrics.getRegistry().getCounters().get(MetricsConstants.CREATE_TOTAL_TABLES).getCount()); + Assert.assertEquals(9, Metrics.getRegistry().getCounters().get(MetricsConstants.CREATE_TOTAL_PARTITIONS).getCount()); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.DELETE_TOTAL_DATABASES, 1); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.DELETE_TOTAL_TABLES, 3); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.DELETE_TOTAL_PARTITIONS, 3); + Assert.assertEquals(1, Metrics.getRegistry().getCounters().get(MetricsConstants.DELETE_TOTAL_DATABASES).getCount()); + Assert.assertEquals(3, Metrics.getRegistry().getCounters().get(MetricsConstants.DELETE_TOTAL_TABLES).getCount()); + Assert.assertEquals(3, Metrics.getRegistry().getCounters().get(MetricsConstants.DELETE_TOTAL_PARTITIONS).getCount()); //to test initial metadata count metrics. - hiveConf.setVar(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL, ObjectStore.class.getName()); - HiveMetaStore.HMSHandler baseHandler = new HiveMetaStore.HMSHandler("test", hiveConf, false); - baseHandler.init(); - baseHandler.updateMetrics(); - - json = metrics.dumpJson(); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.GAUGE, MetricsConstant.INIT_TOTAL_DATABASES, initDbCount + 1); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.GAUGE, MetricsConstant.INIT_TOTAL_TABLES, initTblCount + 4); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.GAUGE, MetricsConstant.INIT_TOTAL_PARTITIONS, initPartCount + 6); + Assert.assertEquals(initDbCount + 1, + Metrics.getRegistry().getGauges().get(MetricsConstants.TOTAL_DATABASES).getValue()); + Assert.assertEquals(initTblCount + 4, + Metrics.getRegistry().getGauges().get(MetricsConstants.TOTAL_TABLES).getValue()); + Assert.assertEquals(initPartCount + 6, + Metrics.getRegistry().getGauges().get(MetricsConstants.TOTAL_PARTITIONS).getValue()); + } @@ -149,25 +136,26 @@ public void testMetaDataCounts() throws Exception { public void testConnections() throws Exception { //initial state is one connection - String json = metrics.dumpJson(); - int initialCount = (new Integer((MetricsTestUtils.getJsonNode(json, MetricsTestUtils.COUNTER, - MetricsConstant.OPEN_CONNECTIONS)).asText())).intValue(); + int initialCount = + (Integer)Metrics.getRegistry().getGauges().get(MetricsConstants.OPEN_CONNECTIONS).getValue(); //create two connections HiveMetaStoreClient msc = new HiveMetaStoreClient(hiveConf); HiveMetaStoreClient msc2 = new HiveMetaStoreClient(hiveConf); - json = metrics.dumpJson(); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.OPEN_CONNECTIONS, initialCount + 2); + Assert.assertEquals(initialCount + 2, + Metrics.getRegistry().getGauges().get(MetricsConstants.OPEN_CONNECTIONS).getValue()); //close one connection, verify still two left msc.close(); - json = metrics.dumpJson(); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.OPEN_CONNECTIONS, initialCount + 1); + Thread.sleep(500); // TODO Evil! Need to figure out a way to remove this sleep. + Assert.assertEquals(initialCount + 1, + Metrics.getRegistry().getGauges().get(MetricsConstants.OPEN_CONNECTIONS).getValue()); //close one connection, verify still one left msc2.close(); - json = metrics.dumpJson(); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.OPEN_CONNECTIONS, initialCount); + Thread.sleep(500); // TODO Evil! Need to figure out a way to remove this sleep. + Assert.assertEquals(initialCount, + Metrics.getRegistry().getGauges().get(MetricsConstants.OPEN_CONNECTIONS).getValue()); } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HMSMetricsListener.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HMSMetricsListener.java index 98288a00755..1a5339a02c8 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HMSMetricsListener.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HMSMetricsListener.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,9 +17,9 @@ */ package org.apache.hadoop.hive.metastore; +import com.codahale.metrics.Counter; +import com.codahale.metrics.MetricRegistry; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.metrics.common.Metrics; -import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.events.AddPartitionEvent; import org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent; @@ -27,6 +27,8 @@ import org.apache.hadoop.hive.metastore.events.DropDatabaseEvent; import org.apache.hadoop.hive.metastore.events.DropPartitionEvent; import org.apache.hadoop.hive.metastore.events.DropTableEvent; +import org.apache.hadoop.hive.metastore.metrics.Metrics; +import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,47 +37,57 @@ */ public class HMSMetricsListener extends MetaStoreEventListener { - public static final Logger LOGGER = LoggerFactory.getLogger(HMSMetricsListener.class); - private Metrics metrics; + private static final Logger LOGGER = LoggerFactory.getLogger(HMSMetricsListener.class); + + private Counter createdDatabases, deletedDatabases, createdTables, deletedTables, createdParts, + deletedParts; - public HMSMetricsListener(Configuration config, Metrics metrics) { + public HMSMetricsListener(Configuration config) { super(config); - this.metrics = metrics; + MetricRegistry registry = Metrics.getRegistry(); + if (registry != null) { + createdDatabases = Metrics.getOrCreateCounter(MetricsConstants.CREATE_TOTAL_DATABASES); + deletedDatabases = Metrics.getOrCreateCounter(MetricsConstants.DELETE_TOTAL_DATABASES); + createdTables = Metrics.getOrCreateCounter(MetricsConstants.CREATE_TOTAL_TABLES); + deletedTables = Metrics.getOrCreateCounter(MetricsConstants.DELETE_TOTAL_TABLES); + createdParts = Metrics.getOrCreateCounter(MetricsConstants.CREATE_TOTAL_PARTITIONS); + deletedParts = Metrics.getOrCreateCounter(MetricsConstants.DELETE_TOTAL_PARTITIONS); + } } @Override public void onCreateDatabase(CreateDatabaseEvent dbEvent) throws MetaException { - incrementCounterInternal(MetricsConstant.CREATE_TOTAL_DATABASES); + HiveMetaStore.HMSHandler.databaseCount.incrementAndGet(); + createdDatabases.inc(); } @Override public void onDropDatabase(DropDatabaseEvent dbEvent) throws MetaException { - incrementCounterInternal(MetricsConstant.DELETE_TOTAL_DATABASES); + HiveMetaStore.HMSHandler.databaseCount.decrementAndGet(); + deletedDatabases.inc(); } @Override public void onCreateTable(CreateTableEvent tableEvent) throws MetaException { - incrementCounterInternal(MetricsConstant.CREATE_TOTAL_TABLES); + HiveMetaStore.HMSHandler.tableCount.incrementAndGet(); + createdTables.inc(); } @Override public void onDropTable(DropTableEvent tableEvent) throws MetaException { - incrementCounterInternal(MetricsConstant.DELETE_TOTAL_TABLES); + HiveMetaStore.HMSHandler.tableCount.decrementAndGet(); + deletedTables.inc(); } @Override public void onDropPartition(DropPartitionEvent partitionEvent) throws MetaException { - incrementCounterInternal(MetricsConstant.DELETE_TOTAL_PARTITIONS); + HiveMetaStore.HMSHandler.partCount.decrementAndGet(); + deletedParts.inc(); } @Override public void onAddPartition(AddPartitionEvent partitionEvent) throws MetaException { - incrementCounterInternal(MetricsConstant.CREATE_TOTAL_PARTITIONS); - } - - private void incrementCounterInternal(String name) { - if (metrics != null) { - metrics.incrementCounter(name); - } + HiveMetaStore.HMSHandler.partCount.incrementAndGet(); + createdParts.inc(); } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index df01b2578c4..0a406838bf1 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -52,6 +52,7 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -59,6 +60,9 @@ import javax.jdo.JDOException; +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.MetricRegistry; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableListMultimap; import com.google.common.collect.Lists; @@ -77,10 +81,6 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.hive.common.cli.CommonCliOptions; -import org.apache.hadoop.hive.common.metrics.common.Metrics; -import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; -import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; -import org.apache.hadoop.hive.common.metrics.common.MetricsVariable; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.io.HdfsUtils; @@ -127,6 +127,8 @@ import org.apache.hadoop.hive.metastore.events.PreReadTableEvent; import org.apache.hadoop.hive.metastore.filemeta.OrcFileMetadataHandler; import org.apache.hadoop.hive.metastore.messaging.EventMessage.EventType; +import org.apache.hadoop.hive.metastore.metrics.Metrics; +import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; import org.apache.hadoop.hive.metastore.security.MetastoreDelegationTokenManager; @@ -244,8 +246,9 @@ public TTransport getTransport(TTransport trans) { private FileMetadataManager fileMetadataManager; private PartitionExpressionProxy expressionProxy; - //For Metrics - private int initDatabaseCount, initTableCount, initPartCount; + // Variables for metrics + // Package visible so that HMSMetricsListener can see them. + static AtomicInteger databaseCount, tableCount, partCount; private Warehouse wh; // hdfs warehouse private static final ThreadLocal threadLocalMS = @@ -263,6 +266,14 @@ protected TxnStore initialValue() { } }; + private static final ThreadLocal> timerContexts = + new ThreadLocal>() { + @Override + protected Map initialValue() { + return new HashMap<>(); + } + }; + public static RawStore getRawStore() { return threadLocalMS.get(); } @@ -489,39 +500,47 @@ public void init() throws MetaException { //Start Metrics for Embedded mode if (hiveConf.getBoolVar(ConfVars.METASTORE_METRICS)) { - try { - MetricsFactory.init(hiveConf); - } catch (Exception e) { - // log exception, but ignore inability to start - LOG.error("error in Metrics init: " + e.getClass().getName() + " " - + e.getMessage(), e); - } - } - - Metrics metrics = MetricsFactory.getInstance(); - if (metrics != null && hiveConf.getBoolVar(ConfVars.METASTORE_INIT_METADATA_COUNT_ENABLED)) { - LOG.info("Begin calculating metadata count metrics."); - updateMetrics(); - LOG.info("Finished metadata count metrics: " + initDatabaseCount + " databases, " + initTableCount + - " tables, " + initPartCount + " partitions."); - metrics.addGauge(MetricsConstant.INIT_TOTAL_DATABASES, new MetricsVariable() { - @Override - public Object getValue() { - return initDatabaseCount; - } - }); - metrics.addGauge(MetricsConstant.INIT_TOTAL_TABLES, new MetricsVariable() { - @Override - public Object getValue() { - return initTableCount; - } - }); - metrics.addGauge(MetricsConstant.INIT_TOTAL_PARTITIONS, new MetricsVariable() { - @Override - public Object getValue() { - return initPartCount; + Metrics.initialize(hiveConf); + } + + MetricRegistry registry = Metrics.getRegistry(); + if (registry != null && hiveConf.getBoolVar(ConfVars.METASTORE_INIT_METADATA_COUNT_ENABLED)) { + synchronized (HMSHandler.class) { + if (!registry.getNames().contains(MetricsConstants.TOTAL_DATABASES)) { + LOG.info("Begin calculating metadata count metrics."); + databaseCount = new AtomicInteger(); + tableCount = new AtomicInteger(); + partCount = new AtomicInteger(); + updateMetrics(); + LOG.info( + "Finished metadata count metrics: " + databaseCount + " databases, " + tableCount + + " tables, " + partCount + " partitions."); + registry.register(MetricsConstants.TOTAL_DATABASES, new Gauge() { + @Override + public Integer getValue() { + return databaseCount.get(); + } + }); + registry.register(MetricsConstants.TOTAL_TABLES, new Gauge() { + @Override + public Integer getValue() { + return tableCount.get(); + } + }); + registry.register(MetricsConstants.TOTAL_PARTITIONS, new Gauge() { + @Override + public Integer getValue() { + return partCount.get(); + } + }); + registry.counter(MetricsConstants.CREATE_TOTAL_DATABASES); + registry.counter(MetricsConstants.CREATE_TOTAL_TABLES); + registry.counter(MetricsConstants.CREATE_TOTAL_PARTITIONS); + registry.counter(MetricsConstants.DELETE_TOTAL_DATABASES); + registry.counter(MetricsConstants.DELETE_TOTAL_TABLES); + registry.counter(MetricsConstants.DELETE_TOTAL_PARTITIONS); } - }); + } } preListeners = MetaStoreUtils.getMetaStoreListeners(MetaStorePreEventListener.class, @@ -534,8 +553,8 @@ public Object getValue() { listeners.add(new AcidEventListener(hiveConf)); transactionalListeners = MetaStoreUtils.getMetaStoreListeners(TransactionalMetaStoreEventListener.class,hiveConf, hiveConf.getVar(ConfVars.METASTORE_TRANSACTIONAL_EVENT_LISTENERS)); - if (metrics != null) { - listeners.add(new HMSMetricsListener(hiveConf, metrics)); + if (registry != null) { + listeners.add(new HMSMetricsListener(hiveConf)); } endFunctionListeners = MetaStoreUtils.getMetaStoreListeners( @@ -851,9 +870,14 @@ private String startFunction(String function, String extraLogInfo) { incrementCounter(function); logInfo((getThreadLocalIpAddress() == null ? "" : "source:" + getThreadLocalIpAddress() + " ") + function + extraLogInfo); - if (MetricsFactory.getInstance() != null) { - MetricsFactory.getInstance().startStoredScope(MetricsConstant.API_PREFIX + function); - } + com.codahale.metrics.Timer timer = + Metrics.getOrCreateTimer(MetricsConstants.API_PREFIX + function); + if (timer != null) { + // Timer will be null we aren't using the metrics + timerContexts.get().put(function, timer.time()); + } + Counter counter = Metrics.getOrCreateCounter(MetricsConstants.ACTIVE_CALLS + function); + if (counter != null) counter.inc(); return function; } @@ -890,9 +914,12 @@ private void endFunction(String function, boolean successful, Exception e, } private void endFunction(String function, MetaStoreEndFunctionContext context) { - if (MetricsFactory.getInstance() != null) { - MetricsFactory.getInstance().endStoredScope(MetricsConstant.API_PREFIX + function); + com.codahale.metrics.Timer.Context timerContext = timerContexts.get().remove(function); + if (timerContext != null) { + timerContext.close(); } + Counter counter = Metrics.getOrCreateCounter(MetricsConstants.ACTIVE_CALLS + function); + if (counter != null) counter.dec(); for (MetaStoreEndFunctionListener listener : endFunctionListeners) { listener.onEndFunction(function, context); @@ -7254,9 +7281,11 @@ public CacheFileMetadataResult cache_file_metadata( @VisibleForTesting public void updateMetrics() throws MetaException { - initTableCount = getMS().getTableCount(); - initPartCount = getMS().getPartitionCount(); - initDatabaseCount = getMS().getDatabaseCount(); + if (databaseCount != null) { + tableCount.set(getMS().getTableCount()); + partCount.set(getMS().getPartitionCount()); + databaseCount.set(getMS().getDatabaseCount()); + } } @Override @@ -7538,7 +7567,7 @@ public void run() { } if (conf.getBoolVar(ConfVars.METASTORE_METRICS)) { try { - MetricsFactory.close(); + Metrics.shutdown(); } catch (Exception e) { LOG.error("error in Metrics deinit: " + e.getClass().getName() + " " + e.getMessage(), e); @@ -7550,7 +7579,7 @@ public void run() { //Start Metrics for Standalone (Remote) Mode if (conf.getBoolVar(ConfVars.METASTORE_METRICS)) { try { - MetricsFactory.init(conf); + Metrics.initialize(conf); } catch (Exception e) { // log exception, but ignore inability to start LOG.error("error in Metrics init: " + e.getClass().getName() + " " @@ -7572,6 +7601,8 @@ public void run() { } } + private static final AtomicInteger openConnections = new AtomicInteger(); + /** * Start Metastore based on a passed {@link HadoopThriftAuthBridge} * @@ -7700,6 +7731,17 @@ public static void startMetaStore(int port, HadoopThriftAuthBridge bridge, serverSocket = new TServerSocketKeepAlive(serverSocket); } + // Metrics will have already been initialized if we're using them since HMSHandler + // intializes them. + MetricRegistry registry = Metrics.getRegistry(); + if (registry != null) { + registry.register(MetricsConstants.OPEN_CONNECTIONS, new Gauge() { + @Override + public Integer getValue() { + return openConnections.get(); + } + }); + } TThreadPoolServer.Args args = new TThreadPoolServer.Args(serverSocket) .processor(processor) .transportFactory(transFactory) @@ -7716,27 +7758,13 @@ public void preServe() { @Override public ServerContext createContext(TProtocol tProtocol, TProtocol tProtocol1) { - try { - Metrics metrics = MetricsFactory.getInstance(); - if (metrics != null) { - metrics.incrementCounter(MetricsConstant.OPEN_CONNECTIONS); - } - } catch (Exception e) { - LOG.warn("Error Reporting Metastore open connection to Metrics system", e); - } + openConnections.incrementAndGet(); return null; } @Override public void deleteContext(ServerContext serverContext, TProtocol tProtocol, TProtocol tProtocol1) { - try { - Metrics metrics = MetricsFactory.getInstance(); - if (metrics != null) { - metrics.decrementCounter(MetricsConstant.OPEN_CONNECTIONS); - } - } catch (Exception e) { - LOG.warn("Error Reporting Metastore close connection to Metrics system", e); - } + openConnections.decrementAndGet(); // If the IMetaStoreClient#close was called, HMSHandler#shutdown would have already // cleaned up thread local RawStore. Otherwise, do it now. cleanupRawStore(); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 0a80241b771..efbdb8f82d4 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -61,6 +61,8 @@ import javax.jdo.identity.IntIdentity; import javax.sql.DataSource; +import com.codahale.metrics.Counter; +import com.codahale.metrics.MetricRegistry; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.hadoop.conf.Configurable; @@ -71,9 +73,6 @@ import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; -import org.apache.hadoop.hive.common.metrics.common.Metrics; -import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; -import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreDirectSql.SqlFilterForPushdown; @@ -126,6 +125,8 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.datasource.DataSourceProvider; import org.apache.hadoop.hive.metastore.datasource.DataSourceProviderFactory; +import org.apache.hadoop.hive.metastore.metrics.Metrics; +import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.apache.hadoop.hive.metastore.model.MColumnDescriptor; import org.apache.hadoop.hive.metastore.model.MConstraint; import org.apache.hadoop.hive.metastore.model.MDBPrivilege; @@ -245,6 +246,7 @@ private Transaction currentTransaction = null; private TXN_STATUS transactionStatus = TXN_STATUS.NO_STATE; private Pattern partitionValidationPattern; + private Counter directSqlErrors; /** * A Autocloseable wrapper around Query class to pass the Query object to the caller and let the caller release @@ -325,6 +327,13 @@ public void setConf(Configuration conf) { partitionValidationPattern = null; } + // Note, if metrics have not been initialized this will return null, which means we aren't + // using metrics. Thus we should always check whether this is non-null before using. + MetricRegistry registry = Metrics.getRegistry(); + if (registry != null) { + directSqlErrors = Metrics.getOrCreateCounter(MetricsConstants.DIRECTSQL_ERRORS); + } + if (!isInitialized) { throw new RuntimeException( "Unable to create persistence manager. Check dss.log for details"); @@ -2845,13 +2854,8 @@ private void handleDirectSqlError(Exception ex) throws MetaException, NoSuchObje start = doTrace ? System.nanoTime() : 0; } - Metrics metrics = MetricsFactory.getInstance(); - if (metrics != null) { - try { - metrics.incrementCounter(MetricsConstant.DIRECTSQL_ERRORS); - } catch (Exception e) { - LOG.warn("Error reporting Direct SQL errors to metrics system", e); - } + if (directSqlErrors != null) { + directSqlErrors.inc(); } doUseDirectSql = false; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index a6d56137b3c..a9f78b78cdc 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.metastore.txn; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.MetricRegistry; import com.google.common.annotations.VisibleForTesting; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; @@ -35,6 +37,8 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.datasource.BoneCPDataSourceProvider; import org.apache.hadoop.hive.metastore.datasource.DataSourceProvider; +import org.apache.hadoop.hive.metastore.metrics.Metrics; +import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.dbcp.PoolingDataSource; @@ -56,6 +60,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; import java.util.regex.Pattern; @@ -191,7 +196,7 @@ public static OpertaionType fromDataOperationType(DataOperationType dop) { // Maximum number of open transactions that's allowed private static volatile int maxOpenTxns = 0; // Current number of open txns - private static volatile long numOpenTxns = 0; + private static final AtomicLong numOpenTxns = new AtomicLong(); // Whether number of open transactions reaches the threshold private static volatile boolean tooManyOpenTxns = false; // The AcidHouseKeeperService for counting open transactions @@ -275,6 +280,19 @@ public void setConf(HiveConf conf) { } finally { closeDbConn(dbConn); } + + // Set up a metric to track the open txns + MetricRegistry registry = Metrics.getRegistry(); + if (registry != null) { + if (!registry.getNames().contains(MetricsConstants.NUM_OPEN_TXNS)) { + registry.register(MetricsConstants.NUM_OPEN_TXNS, new Gauge() { + @Override + public Long getValue() { + return numOpenTxns.get(); + } + }); + } + } } } @@ -462,11 +480,11 @@ public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException { } } - if (!tooManyOpenTxns && numOpenTxns >= maxOpenTxns) { + if (!tooManyOpenTxns && numOpenTxns.get() >= maxOpenTxns) { tooManyOpenTxns = true; } if (tooManyOpenTxns) { - if (numOpenTxns < maxOpenTxns * 0.9) { + if (numOpenTxns.get() < maxOpenTxns * 0.9) { tooManyOpenTxns = false; } else { LOG.warn("Maximum allowed number of open transactions (" + maxOpenTxns + ") has been " + @@ -3150,7 +3168,7 @@ public void countOpenTxns() throws MetaException { LOG.error("Transaction database not properly configured, " + "can't find txn_state from TXNS."); } else { - numOpenTxns = rs.getLong(1); + numOpenTxns.set(rs.getLong(1)); } } catch (SQLException e) { LOG.debug("Going to rollback"); diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java index b28ea735935..08228af4d90 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -23,11 +23,7 @@ import java.util.List; import java.util.Set; -import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; -import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; -import org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics; -import org.apache.hadoop.hive.common.metrics.metrics2.MetricsReporting; -import org.apache.hadoop.hive.common.metrics.MetricsTestUtils; +import com.codahale.metrics.Counter; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; import org.apache.hadoop.hive.metastore.api.Database; @@ -51,6 +47,8 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.messaging.EventMessage; +import org.apache.hadoop.hive.metastore.metrics.Metrics; +import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -364,11 +362,12 @@ public void testRoleOps() throws InvalidObjectException, MetaException, NoSuchOb public void testDirectSqlErrorMetrics() throws Exception { HiveConf conf = new HiveConf(); conf.setBoolVar(HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, true); - conf.setVar(HiveConf.ConfVars.HIVE_METRICS_REPORTER, MetricsReporting.JSON_FILE.name() - + "," + MetricsReporting.JMX.name()); + Metrics.initialize(conf); - MetricsFactory.init(conf); - CodahaleMetrics metrics = (CodahaleMetrics) MetricsFactory.getInstance(); + // recall setup so that we get an object store with the metrics initalized + setUp(); + Counter directSqlErrors = + Metrics.getRegistry().getCounters().get(MetricsConstants.DIRECTSQL_ERRORS); objectStore.new GetDbHelper("foo", null, true, true) { @Override @@ -383,9 +382,7 @@ protected Database getJdoResult(ObjectStore.GetHelper ctx) throws Meta } }.run(false); - String json = metrics.dumpJson(); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, - MetricsConstant.DIRECTSQL_ERRORS, ""); + Assert.assertEquals(0, directSqlErrors.getCount()); objectStore.new GetDbHelper("foo", null, true, true) { @Override @@ -400,9 +397,7 @@ protected Database getJdoResult(ObjectStore.GetHelper ctx) throws Meta } }.run(false); - json = metrics.dumpJson(); - MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, - MetricsConstant.DIRECTSQL_ERRORS, 1); + Assert.assertEquals(1, directSqlErrors.getCount()); } public static void dropAllStoreObjects(RawStore store) throws MetaException, InvalidObjectException, InvalidInputException { diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ThreadPool.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ThreadPool.java new file mode 100644 index 00000000000..d0fcd25fbb2 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ThreadPool.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; + +/** + * Utility singleton class to manage all the threads. + */ +public class ThreadPool { + + static final private Logger LOG = LoggerFactory.getLogger(ThreadPool.class); + private static ThreadPool self = null; + private static ScheduledExecutorService pool; + + public static synchronized ThreadPool initialize(Configuration conf) { + if (self == null) { + self = new ThreadPool(conf); + LOG.debug("ThreadPool initialized"); + } + return self; + } + + private ThreadPool(Configuration conf) { + pool = Executors.newScheduledThreadPool(MetastoreConf.getIntVar(conf, + MetastoreConf.ConfVars.THREAD_POOL_SIZE)); + } + + public static ScheduledExecutorService getPool() { + if (self == null) { + throw new RuntimeException("ThreadPool accessed before initialized"); + } + return pool; + } + + public static synchronized void shutdown() { + if (self != null) { + pool.shutdown(); + self = null; + } + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/JsonReporter.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/JsonReporter.java new file mode 100644 index 00000000000..b804cdade07 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/JsonReporter.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.metrics; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; +import com.codahale.metrics.MetricFilter; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.ScheduledReporter; +import com.codahale.metrics.Timer; +import com.codahale.metrics.json.MetricsModule; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectWriter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.net.URI; +import java.util.SortedMap; +import java.util.concurrent.TimeUnit; + +public class JsonReporter extends ScheduledReporter { + private static final Logger LOG = LoggerFactory.getLogger(JsonReporter.class); + + private final Configuration conf; + private final MetricRegistry registry; + private ObjectWriter jsonWriter; + private Path path; + private Path tmpPath; + private FileSystem fs; + + private JsonReporter(MetricRegistry registry, String name, MetricFilter filter, + TimeUnit rateUnit, TimeUnit durationUnit, Configuration conf) { + super(registry, name, filter, rateUnit, durationUnit); + this.conf = conf; + this.registry = registry; + } + + @Override + public void start(long period, TimeUnit unit) { + jsonWriter = new ObjectMapper().registerModule(new MetricsModule(TimeUnit.MILLISECONDS, + TimeUnit.MILLISECONDS, false)).writerWithDefaultPrettyPrinter(); + String pathString = MetastoreConf.getVar(conf, MetastoreConf.ConfVars .METRICS_JSON_FILE_LOCATION); + path = new Path(pathString); + + tmpPath = new Path(pathString + ".tmp"); + URI tmpPathURI = tmpPath.toUri(); + try { + if (tmpPathURI.getScheme() == null && tmpPathURI.getAuthority() == null) { + //default local + fs = FileSystem.getLocal(conf); + } else { + fs = FileSystem.get(tmpPathURI, conf); + } + } + catch (IOException e) { + LOG.error("Unable to access filesystem for path " + tmpPath + ". Aborting reporting", e); + return; + } + super.start(period, unit); + } + + @Override + public void report(SortedMap sortedMap, SortedMap sortedMap1, + SortedMap sortedMap2, SortedMap sortedMap3, + SortedMap sortedMap4) { + + String json; + try { + json = jsonWriter.writeValueAsString(registry); + } catch (JsonProcessingException e) { + LOG.error("Unable to convert json to string ", e); + return; + } + + BufferedWriter bw = null; + try { + fs.delete(tmpPath, true); + bw = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath, true))); + bw.write(json); + fs.setPermission(tmpPath, FsPermission.createImmutable((short) 0644)); + } catch (IOException e) { + LOG.error("Unable to write to temp file " + tmpPath, e); + return; + } finally { + if (bw != null) { + try { + bw.close(); + } catch (IOException e) { + // Not much we can do + LOG.error("Caught error closing json metric reporter file", e); + } + } + } + + try { + fs.rename(tmpPath, path); + fs.setPermission(path, FsPermission.createImmutable((short) 0644)); + } catch (IOException e) { + LOG.error("Unable to rename temp file " + tmpPath + " to " + path, e); + } + } + + public static Builder forRegistry(MetricRegistry registry, Configuration conf) { + return new Builder(registry, conf); + } + + public static class Builder { + private final MetricRegistry registry; + private final Configuration conf; + private TimeUnit rate = TimeUnit.SECONDS; + private TimeUnit duration = TimeUnit.MILLISECONDS; + private MetricFilter filter = MetricFilter.ALL; + + private Builder(MetricRegistry registry, Configuration conf) { + this.registry = registry; + this.conf = conf; + } + + public Builder convertRatesTo(TimeUnit rate) { + this.rate = rate; + return this; + } + + public Builder convertDurationsTo(TimeUnit duration) { + this.duration = duration; + return this; + } + + public Builder filter(MetricFilter filter) { + this.filter = filter; + return this; + } + + public JsonReporter build() { + return new JsonReporter(registry, "json-reporter", filter, rate, duration, conf); + } + + } + +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/Metrics.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/Metrics.java new file mode 100644 index 00000000000..190361b125a --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/Metrics.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.metrics; + +import com.codahale.metrics.ConsoleReporter; +import com.codahale.metrics.Counter; +import com.codahale.metrics.JmxReporter; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Reporter; +import com.codahale.metrics.ScheduledReporter; +import com.codahale.metrics.Timer; +import com.codahale.metrics.jvm.BufferPoolMetricSet; +import com.codahale.metrics.jvm.ClassLoadingGaugeSet; +import com.codahale.metrics.jvm.GarbageCollectorMetricSet; +import com.codahale.metrics.jvm.MemoryUsageGaugeSet; +import com.codahale.metrics.jvm.ThreadStatesGaugeSet; +import com.github.joshelser.dropwizard.metrics.hadoop.HadoopMetrics2Reporter; +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.ThreadPool; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.management.ManagementFactory; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +public class Metrics { + private static final Logger LOGGER = LoggerFactory.getLogger(Metrics.class); + + private static Metrics self; + + private final MetricRegistry registry; + private List reporters; + private List scheduledReporters; + private boolean hadoopMetricsStarted; + + public static synchronized Metrics initialize(Configuration conf) { + if (self == null) { + self = new Metrics(conf); + } + return self; + } + + public static MetricRegistry getRegistry() { + if (self == null) return null; + return self.registry; + } + + public static void shutdown() { + if (self != null) { + for (ScheduledReporter reporter : self.scheduledReporters) { + reporter.stop(); + reporter.close(); + } + if (self.hadoopMetricsStarted) DefaultMetricsSystem.shutdown(); + self = null; + } + } + + /** + * Get an existing counter or create a new one if the requested one does not yet exist. Creation + * is synchronized to assure that only one instance of the counter is created. + * @param name name of the counter + * @return new Counter, or existing one if it already exists. This will return null if the + * metrics have not been initialized. + */ + public static Counter getOrCreateCounter(String name) { + if (self == null) return null; + Map counters = self.registry.getCounters(); + Counter counter = counters.get(name); + if (counter != null) return counter; + // Looks like it doesn't exist. Lock so that two threads don't create it at once. + synchronized (Metrics.class) { + // Recheck to make sure someone didn't create it while we waited. + counters = self.registry.getCounters(); + counter = counters.get(name); + if (counter != null) return counter; + return self.registry.counter(name); + } + } + + /** + * Get an existing timer or create a new one if the requested one does not yet exist. Creation + * is synchronized to assure that only one instance of the counter is created. + * @param name timer name + * @return new Timer, or existing one if it already exists, null if the metrics have not been + * initialized. + */ + public static Timer getOrCreateTimer(String name) { + if (self == null) return null; + Map timers = self.registry.getTimers(); + Timer timer = timers.get(name); + if (timer != null) return timer; + synchronized (Metrics.class) { + timers = self.registry.getTimers(); + timer = timers.get(name); + if (timer != null) return timer; + return self.registry.timer(name); + } + } + + @VisibleForTesting + static List getReporters() { + return self.reporters; + } + + private Metrics(Configuration conf) { + registry = new MetricRegistry(); + + registry.registerAll(new GarbageCollectorMetricSet()); + registry.registerAll(new BufferPoolMetricSet(ManagementFactory.getPlatformMBeanServer())); + registry.registerAll(new MemoryUsageGaugeSet()); + registry.registerAll(new ThreadStatesGaugeSet()); + registry.registerAll(new ClassLoadingGaugeSet()); + + /* + * This is little complicated. First we look for our own config values on this. If those + * aren't set we use the Hive ones. But Hive also has multiple ways to do this, so we need to + * look in both of theirs as well. We can't use theirs directly because they wrap the + * codahale reporters in their own and we do not. + */ + // Check our config value first. I'm explicitly avoiding getting the default value for now, + // as I don't want our default to override a Hive set value. + String reportersToStart = conf.get(MetastoreConf.ConfVars.METRICS_REPORTERS.varname); + if (reportersToStart == null) { + // Now look in the current Hive config value. Again, avoiding getting defaults + reportersToStart = + conf.get(MetastoreConf.ConfVars.HIVE_CODAHALE_METRICS_REPORTER_CLASSES.hiveName); + if (reportersToStart == null) { + // Last chance, look in the old Hive config value. Still avoiding defaults. + reportersToStart = + conf.get(MetastoreConf.ConfVars.HIVE_METRICS_REPORTER.hiveName); + if (reportersToStart == null) { + // Alright fine, we'll use our defaults + reportersToStart = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.METRICS_REPORTERS); + } + } + } + + reporters = new ArrayList<>(); + scheduledReporters = new ArrayList<>(); + if (reportersToStart != null && reportersToStart.length() > 0) { + String[] reporterNames = reportersToStart.toLowerCase().split(","); + for (String reporterName : reporterNames) { + if (reporterName.equals("console") || reporterName.endsWith("consolemetricsreporter")) { + ConsoleReporter reporter = ConsoleReporter.forRegistry(registry) + .convertRatesTo(TimeUnit.SECONDS) + .convertDurationsTo(TimeUnit.MILLISECONDS) + .build(); + reporter.start(15, TimeUnit.SECONDS); + reporters.add(reporter); + scheduledReporters.add(reporter); + } else if (reporterName.equals("jmx") || reporterName.endsWith("jmxmetricsreporter")) { + JmxReporter reporter = JmxReporter.forRegistry(registry) + .convertRatesTo(TimeUnit.SECONDS) + .convertDurationsTo(TimeUnit.MILLISECONDS) + .build(); + reporter.start(); + reporters.add(reporter); + } else if (reporterName.startsWith("json") || reporterName.endsWith("jsonfilemetricsreporter")) { + // We have to initialize the thread pool before we start this one, as it uses it + ThreadPool.initialize(conf); + JsonReporter reporter = JsonReporter.forRegistry(registry, conf) + .convertRatesTo(TimeUnit.SECONDS) + .convertDurationsTo(TimeUnit.MILLISECONDS) + .build(); + reporter.start(MetastoreConf.getTimeVar(conf, + MetastoreConf.ConfVars.METRICS_JSON_FILE_INTERVAL, TimeUnit.SECONDS), TimeUnit.SECONDS); + reporters.add(reporter); + scheduledReporters.add(reporter); + } else if (reporterName.startsWith("hadoop") || reporterName.endsWith("metrics2reporter")) { + HadoopMetrics2Reporter reporter = HadoopMetrics2Reporter.forRegistry(registry) + .convertRatesTo(TimeUnit.SECONDS) + .convertDurationsTo(TimeUnit.MILLISECONDS) + .build(DefaultMetricsSystem.initialize("metastore"), "metastore", "Runtime metadata" + + " catalog", "general-metadata"); + reporter.start(1, TimeUnit.MINUTES); + reporters.add(reporter); + scheduledReporters.add(reporter); + hadoopMetricsStarted = true; + } else { + throw new RuntimeException("Unknown metric type " + reporterName); + } + } + } else { + LOGGER.warn("No metrics reporters configured."); + } + + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/MetricsConstants.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/MetricsConstants.java new file mode 100644 index 00000000000..3b188f83afa --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/MetricsConstants.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.metrics; + +public class MetricsConstants { + public static final String ACTIVE_CALLS = "active_calls_"; + public static final String API_PREFIX = "api_"; + + public static final String CREATE_TOTAL_DATABASES = "create_total_count_dbs"; + public static final String CREATE_TOTAL_TABLES = "create_total_count_tables"; + public static final String CREATE_TOTAL_PARTITIONS = "create_total_count_partitions"; + + public static final String DELETE_TOTAL_DATABASES = "delete_total_count_dbs"; + public static final String DELETE_TOTAL_TABLES = "delete_total_count_tables"; + public static final String DELETE_TOTAL_PARTITIONS = "delete_total_count_partitions"; + + public static final String DIRECTSQL_ERRORS = "directsql_errors"; + + public static final String JVM_PAUSE_INFO = "jvm.pause.info-threshold"; + public static final String JVM_PAUSE_WARN = "jvm.pause.warn-threshold"; + public static final String JVM_EXTRA_SLEEP = "jvm.pause.extraSleepTime"; + + public static final String NUM_OPEN_TXNS = "num_open_transactions"; + public static final String NUM_TIMED_OUT_TXNS = "num_timed_out_transactions"; + + public static final String OPEN_CONNECTIONS = "open_connections"; + + public static final String TOTAL_DATABASES = "total_count_dbs"; + public static final String TOTAL_TABLES = "total_count_tables"; + public static final String TOTAL_PARTITIONS = "total_count_partitions"; +} diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/metrics/TestMetrics.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/metrics/TestMetrics.java new file mode 100644 index 00000000000..ab84f643b7a --- /dev/null +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/metrics/TestMetrics.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.metrics; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.TimeUnit; + +public class TestMetrics { + + @Test + public void jsonReporter() throws Exception { + String jsonFile = System.getProperty("java.io.tmpdir") + System.getProperty("file.separator") + + "TestMetricsOutput.json"; + Configuration conf = MetastoreConf.newMetastoreConf(); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.METRICS_REPORTERS, "json"); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.METRICS_JSON_FILE_LOCATION, jsonFile); + MetastoreConf.setTimeVar(conf, MetastoreConf.ConfVars.METRICS_JSON_FILE_INTERVAL, 1, + TimeUnit.SECONDS); + + Metrics.initialize(conf); + + final List words = Arrays.asList("mary", "had", "a", "little", "lamb"); + MetricRegistry registry = Metrics.getRegistry(); + registry.register("my-gauge", new Gauge() { + + @Override + public Integer getValue() { + return words.size(); + } + }); + + Counter counter = Metrics.getOrCreateCounter("my-counter"); + counter.inc(); + counter.inc(); + + Histogram hist = registry.histogram("my-histogram"); + hist.update(5); + hist.update(17); + + Meter meter = registry.meter("my-meter"); + meter.mark(); + Thread.sleep(10); + meter.mark(); + + Timer timer = Metrics.getOrCreateTimer("my-timer"); + timer.time(new Callable() { + @Override + public Long call() throws Exception { + Thread.sleep(100); + return 1L; + } + }); + + // Make sure it has a chance to dump it. + Thread.sleep(2000); + + FileSystem fs = FileSystem.get(conf); + Path path = new Path(jsonFile); + Assert.assertTrue(fs.exists(path)); + + String json = new String(MetricsTestUtils.getFileData(jsonFile, 200, 10)); + MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, "my-counter", 2); + MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.METER, "my-meter", 2); + MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.TIMER, "my-timer", 1); + MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.GAUGE, "my-gauge", 5); + } + + @Test + public void allReporters() throws Exception { + String jsonFile = System.getProperty("java.io.tmpdir") + System.getProperty("file.separator") + + "TestMetricsOutput.json"; + Configuration conf = MetastoreConf.newMetastoreConf(); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.METRICS_REPORTERS, "json,jmx,console,hadoop"); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.METRICS_JSON_FILE_LOCATION, jsonFile); + + Metrics.initialize(conf); + + Assert.assertEquals(4, Metrics.getReporters().size()); + } + + @Test + public void allReportersHiveConfig() throws Exception { + String jsonFile = System.getProperty("java.io.tmpdir") + System.getProperty("file.separator") + + "TestMetricsOutput.json"; + Configuration conf = MetastoreConf.newMetastoreConf(); + conf.set(MetastoreConf.ConfVars.HIVE_CODAHALE_METRICS_REPORTER_CLASSES.hiveName, + "org.apache.hadoop.hive.common.metrics.metrics2.JsonFileMetricsReporter," + + "org.apache.hadoop.hive.common.metrics.metrics2.JmxMetricsReporter," + + "org.apache.hadoop.hive.common.metrics.metrics2.ConsoleMetricsReporter," + + "org.apache.hadoop.hive.common.metrics.metrics2.Metrics2Reporter"); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.METRICS_JSON_FILE_LOCATION, jsonFile); + + Metrics.initialize(conf); + + Assert.assertEquals(4, Metrics.getReporters().size()); + } + + @Test + public void allReportersOldHiveConfig() throws Exception { + String jsonFile = System.getProperty("java.io.tmpdir") + System.getProperty("file.separator") + + "TestMetricsOutput.json"; + Configuration conf = MetastoreConf.newMetastoreConf(); + conf.set(MetastoreConf.ConfVars.HIVE_METRICS_REPORTER.hiveName, + "JSON_FILE,JMX,CONSOLE,HADOOP2"); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.METRICS_JSON_FILE_LOCATION, jsonFile); + + Metrics.initialize(conf); + + Assert.assertEquals(4, Metrics.getReporters().size()); + } + + @Test + public void defaults() throws Exception { + String jsonFile = System.getProperty("java.io.tmpdir") + System.getProperty("file.separator") + + "TestMetricsOutput.json"; + Configuration conf = MetastoreConf.newMetastoreConf(); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.METRICS_JSON_FILE_LOCATION, jsonFile); + Metrics.initialize(conf); + + Assert.assertEquals(2, Metrics.getReporters().size()); + } + + @Before + public void shutdownMetrics() { + Metrics.shutdown(); + } + + // Stolen from Hive's MetricsTestUtils. Probably should break it out into it's own class. + private static class MetricsTestUtils { + + static final MetricsCategory COUNTER = new MetricsCategory("counters", "count"); + static final MetricsCategory TIMER = new MetricsCategory("timers", "count"); + static final MetricsCategory GAUGE = new MetricsCategory("gauges", "value"); + static final MetricsCategory METER = new MetricsCategory("meters", "count"); + + static class MetricsCategory { + String category; + String metricsHandle; + MetricsCategory(String category, String metricsHandle) { + this.category = category; + this.metricsHandle = metricsHandle; + } + } + + static void verifyMetricsJson(String json, MetricsCategory category, String metricsName, + Object expectedValue) throws Exception { + JsonNode jsonNode = getJsonNode(json, category, metricsName); + Assert.assertEquals(expectedValue.toString(), jsonNode.asText()); + } + + static JsonNode getJsonNode(String json, MetricsCategory category, String metricsName) throws Exception { + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode rootNode = objectMapper.readTree(json); + JsonNode categoryNode = rootNode.path(category.category); + JsonNode metricsNode = categoryNode.path(metricsName); + return metricsNode.path(category.metricsHandle); + } + + static byte[] getFileData(String path, int timeoutInterval, int tries) throws Exception { + File file = new File(path); + do { + Thread.sleep(timeoutInterval); + tries--; + } while (tries > 0 && !file.exists()); + return Files.readAllBytes(Paths.get(path)); + } + } +} From 1254a7d0753d940089a500fb09b29df05cf728f5 Mon Sep 17 00:00:00 2001 From: Alan Gates Date: Tue, 1 Aug 2017 09:58:52 -0700 Subject: [PATCH 2/4] Adding pom file I missed in the previous checkin. --- standalone-metastore/pom.xml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml index 2e0c51e0a4d..e38ef3770b2 100644 --- a/standalone-metastore/pom.xml +++ b/standalone-metastore/pom.xml @@ -33,11 +33,31 @@ + com.github.joshelser + dropwizard-metrics-hadoop-metrics2-reporter + ${dropwizard-metrics-hadoop-metrics2-reporter.version} + + com.google.guava guava ${guava.version} + io.dropwizard.metrics + metrics-core + ${dropwizard.version} + + + io.dropwizard.metrics + metrics-jvm + ${dropwizard.version} + + + io.dropwizard.metrics + metrics-json + ${dropwizard.version} + + org.apache.hadoop hadoop-common ${hadoop.version} From f2749ab4ea6d9d883e884f8f87e151576fae4320 Mon Sep 17 00:00:00 2001 From: Alan Gates Date: Tue, 1 Aug 2017 11:36:36 -0700 Subject: [PATCH 3/4] Copied JvmPauseMonitor into standalone-metastore --- .../hadoop/hive/metastore/HiveMetaStore.java | 5 +- .../hive/metastore/metrics/JvmPauseMonitor.java | 222 +++++++++++++++++++++ 2 files changed, 224 insertions(+), 3 deletions(-) create mode 100644 standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/JvmPauseMonitor.java diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 0a406838bf1..40e0b2804c4 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1,5 +1,4 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file @@ -73,7 +72,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JvmPauseMonitor; import org.apache.hadoop.hive.common.LogUtils; import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -127,6 +125,7 @@ import org.apache.hadoop.hive.metastore.events.PreReadTableEvent; import org.apache.hadoop.hive.metastore.filemeta.OrcFileMetadataHandler; import org.apache.hadoop.hive.metastore.messaging.EventMessage.EventType; +import org.apache.hadoop.hive.metastore.metrics.JvmPauseMonitor; import org.apache.hadoop.hive.metastore.metrics.Metrics; import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/JvmPauseMonitor.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/JvmPauseMonitor.java new file mode 100644 index 00000000000..93414fce52d --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/JvmPauseMonitor.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.metrics; + +import com.codahale.metrics.Counter; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Daemon; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.management.GarbageCollectorMXBean; +import java.lang.management.ManagementFactory; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +/** + * Based on the JvmPauseMonitor from Hadoop. + */ +public class JvmPauseMonitor { + private static final Logger LOG = LoggerFactory.getLogger( + JvmPauseMonitor.class); + + /** The target sleep time */ + private static final long SLEEP_INTERVAL_MS = 500; + + /** log WARN if we detect a pause longer than this threshold */ + private final long warnThresholdMs; + private static final String WARN_THRESHOLD_KEY = + "jvm.pause.warn-threshold.ms"; + private static final long WARN_THRESHOLD_DEFAULT = 10000; + + /** log INFO if we detect a pause longer than this threshold */ + private final long infoThresholdMs; + private static final String INFO_THRESHOLD_KEY = + "jvm.pause.info-threshold.ms"; + private static final long INFO_THRESHOLD_DEFAULT = 1000; + + private long numGcWarnThresholdExceeded = 0; + private long numGcInfoThresholdExceeded = 0; + private long totalGcExtraSleepTime = 0; + + private Thread monitorThread; + private volatile boolean shouldRun = true; + + public JvmPauseMonitor(Configuration conf) { + this.warnThresholdMs = conf.getLong(WARN_THRESHOLD_KEY, WARN_THRESHOLD_DEFAULT); + this.infoThresholdMs = conf.getLong(INFO_THRESHOLD_KEY, INFO_THRESHOLD_DEFAULT); + } + + public void start() { + Preconditions.checkState(monitorThread == null, + "JvmPauseMonitor thread is Already started"); + monitorThread = new Daemon(new Monitor()); + monitorThread.start(); + } + + public void stop() { + shouldRun = false; + if (isStarted()) { + monitorThread.interrupt(); + try { + monitorThread.join(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + + public boolean isStarted() { + return monitorThread != null; + } + + public long getNumGcWarnThreadholdExceeded() { + return numGcWarnThresholdExceeded; + } + + public long getNumGcInfoThresholdExceeded() { + return numGcInfoThresholdExceeded; + } + + public long getTotalGcExtraSleepTime() { + return totalGcExtraSleepTime; + } + + private String formatMessage(long extraSleepTime, + Map gcTimesAfterSleep, + Map gcTimesBeforeSleep) { + + Set gcBeanNames = Sets.intersection( + gcTimesAfterSleep.keySet(), + gcTimesBeforeSleep.keySet()); + List gcDiffs = Lists.newArrayList(); + for (String name : gcBeanNames) { + GcTimes diff = gcTimesAfterSleep.get(name).subtract( + gcTimesBeforeSleep.get(name)); + if (diff.gcCount != 0) { + gcDiffs.add("GC pool '" + name + "' had collection(s): " + + diff.toString()); + } + } + + String ret = "Detected pause in JVM or host machine (eg GC): " + + "pause of approximately " + extraSleepTime + "ms\n"; + if (gcDiffs.isEmpty()) { + ret += "No GCs detected"; + } else { + ret += Joiner.on("\n").join(gcDiffs); + } + return ret; + } + + private Map getGcTimes() { + Map map = Maps.newHashMap(); + List gcBeans = + ManagementFactory.getGarbageCollectorMXBeans(); + for (GarbageCollectorMXBean gcBean : gcBeans) { + map.put(gcBean.getName(), new GcTimes(gcBean)); + } + return map; + } + + private static class GcTimes { + private GcTimes(GarbageCollectorMXBean gcBean) { + gcCount = gcBean.getCollectionCount(); + gcTimeMillis = gcBean.getCollectionTime(); + } + + private GcTimes(long count, long time) { + this.gcCount = count; + this.gcTimeMillis = time; + } + + private GcTimes subtract(GcTimes other) { + return new GcTimes(this.gcCount - other.gcCount, + this.gcTimeMillis - other.gcTimeMillis); + } + + @Override + public String toString() { + return "count=" + gcCount + " time=" + gcTimeMillis + "ms"; + } + + private final long gcCount; + private final long gcTimeMillis; + } + + private class Monitor implements Runnable { + @Override + public void run() { + Stopwatch sw = new Stopwatch(); + Map gcTimesBeforeSleep = getGcTimes(); + Counter jvmPauseWarnCnt = Metrics.getOrCreateCounter(MetricsConstants.JVM_PAUSE_WARN); + Counter jvmPauseInfoCnt = Metrics.getOrCreateCounter(MetricsConstants.JVM_PAUSE_INFO); + Counter jvmExtraSleepCnt = Metrics.getOrCreateCounter(MetricsConstants.JVM_EXTRA_SLEEP); + while (shouldRun) { + sw.reset().start(); + try { + Thread.sleep(SLEEP_INTERVAL_MS); + } catch (InterruptedException ie) { + return; + } + long extraSleepTime = sw.elapsed(TimeUnit.MILLISECONDS) - SLEEP_INTERVAL_MS; + Map gcTimesAfterSleep = getGcTimes(); + + if (extraSleepTime > warnThresholdMs) { + ++numGcWarnThresholdExceeded; + LOG.warn(formatMessage( + extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep)); + if (jvmPauseWarnCnt != null) jvmPauseWarnCnt.inc(); + } else if (extraSleepTime > infoThresholdMs) { + ++numGcInfoThresholdExceeded; + LOG.info(formatMessage( + extraSleepTime, gcTimesAfterSleep, gcTimesBeforeSleep)); + if (jvmPauseInfoCnt != null) jvmPauseInfoCnt.inc(); + } + if (jvmExtraSleepCnt != null) jvmExtraSleepCnt.inc(extraSleepTime); + totalGcExtraSleepTime += extraSleepTime; + gcTimesBeforeSleep = gcTimesAfterSleep; + } + } + } + + /** + * Simple 'main' to facilitate manual testing of the pause monitor. + * + * This main function just leaks memory into a list. Running this class + * with a 1GB heap will very quickly go into "GC hell" and result in + * log messages about the GC pauses. + */ + public static void main(String []args) throws Exception { + new JvmPauseMonitor(new Configuration()).start(); + List list = Lists.newArrayList(); + int i = 0; + while (true) { + list.add(String.valueOf(i++)); + } + } +} From 30de03b282d9ec79c593e858559b59232342faa8 Mon Sep 17 00:00:00 2001 From: Alan Gates Date: Tue, 1 Aug 2017 13:43:14 -0700 Subject: [PATCH 4/4] Copied PerfLogger into the standalone metastore and moved RetryingHMSHandler to use it. --- .../hive/metastore/TestMetaStoreMetrics.java | 2 +- .../hadoop/hive/metastore/RetryingHMSHandler.java | 4 +- .../hadoop/hive/metastore/metrics/PerfLogger.java | 184 +++++++++++++++++++++ 3 files changed, 187 insertions(+), 3 deletions(-) create mode 100644 standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/PerfLogger.java diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMetrics.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMetrics.java index 0e3da3fd9cc..e1cb1f309a3 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMetrics.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMetrics.java @@ -65,7 +65,7 @@ public void testMethodCounts() throws Exception { driver.run("show databases"); //one call by init, one called here. - Assert.assertEquals(1, Metrics.getRegistry().getTimers().get("api_get_all_databases").getCount()); + Assert.assertEquals(2, Metrics.getRegistry().getTimers().get("api_get_all_databases").getCount()); } @Test diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java index a08c5bd2bd6..affb38f882f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java @@ -26,6 +26,7 @@ import java.util.concurrent.TimeUnit; import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.hadoop.hive.metastore.metrics.PerfLogger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -34,7 +35,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.ql.log.PerfLogger; import org.datanucleus.exceptions.NucleusException; @InterfaceAudience.Private @@ -101,7 +101,7 @@ public Object invoke(final Object proxy, final Method method, final Object[] arg int retryCount = -1; int threadId = HiveMetaStore.HMSHandler.get(); boolean error = true; - PerfLogger perfLogger = PerfLogger.getPerfLogger(origConf, false); + PerfLogger perfLogger = PerfLogger.getPerfLogger(false); perfLogger.PerfLogBegin(CLASS_NAME, method.getName()); try { Result result = invokeInternal(proxy, method, args); diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/PerfLogger.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/PerfLogger.java new file mode 100644 index 00000000000..dfc4308a1fd --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/metrics/PerfLogger.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metrics; + +import com.codahale.metrics.Timer; +import com.google.common.collect.ImmutableMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.Map; + +/** + * PerfLogger. + * + * Can be used to measure and log the time spent by a piece of code. + */ +public class PerfLogger { + protected final Map startTimes = new HashMap<>(); + protected final Map endTimes = new HashMap<>(); + + static final private Logger LOG = LoggerFactory.getLogger(PerfLogger.class.getName()); + protected static final ThreadLocal perfLogger = new ThreadLocal<>(); + + + private PerfLogger() { + // Use getPerfLogger to get an instance of PerfLogger + } + + public static PerfLogger getPerfLogger(boolean resetPerfLogger) { + PerfLogger result = perfLogger.get(); + if (resetPerfLogger || result == null) { + result = new PerfLogger(); + perfLogger.set(result); + } + return result; + } + + public static void setPerfLogger(PerfLogger resetPerfLogger) { + perfLogger.set(resetPerfLogger); + } + + /** + * Call this function when you start to measure time spent by a piece of code. + * @param callerName the logging object to be used. + * @param method method or ID that identifies this perf log element. + */ + public void PerfLogBegin(String callerName, String method) { + long startTime = System.currentTimeMillis(); + startTimes.put(method, new Long(startTime)); + if (LOG.isDebugEnabled()) { + LOG.debug(""); + } + beginMetrics(method); + } + /** + * Call this function in correspondence of PerfLogBegin to mark the end of the measurement. + * @param callerName + * @param method + * @return long duration the difference between now and startTime, or -1 if startTime is null + */ + public long PerfLogEnd(String callerName, String method) { + return PerfLogEnd(callerName, method, null); + } + + /** + * Call this function in correspondence of PerfLogBegin to mark the end of the measurement. + * @param callerName + * @param method + * @return long duration the difference between now and startTime, or -1 if startTime is null + */ + public long PerfLogEnd(String callerName, String method, String additionalInfo) { + Long startTime = startTimes.get(method); + long endTime = System.currentTimeMillis(); + endTimes.put(method, new Long(endTime)); + long duration = startTime == null ? -1 : endTime - startTime.longValue(); + + if (LOG.isDebugEnabled()) { + StringBuilder sb = new StringBuilder(""); + LOG.debug(sb.toString()); + } + endMetrics(method); + return duration; + } + + public Long getStartTime(String method) { + long startTime = 0L; + + if (startTimes.containsKey(method)) { + startTime = startTimes.get(method); + } + return startTime; + } + + public Long getEndTime(String method) { + long endTime = 0L; + + if (endTimes.containsKey(method)) { + endTime = endTimes.get(method); + } + return endTime; + } + + public boolean startTimeHasMethod(String method) { + return startTimes.containsKey(method); + } + + public boolean endTimeHasMethod(String method) { + return endTimes.containsKey(method); + } + + public Long getDuration(String method) { + long duration = 0; + if (startTimes.containsKey(method) && endTimes.containsKey(method)) { + duration = endTimes.get(method) - startTimes.get(method); + } + return duration; + } + + + public ImmutableMap getStartTimes() { + return ImmutableMap.copyOf(startTimes); + } + + public ImmutableMap getEndTimes() { + return ImmutableMap.copyOf(endTimes); + } + + //Methods for metrics integration. Each thread-local PerfLogger will open/close scope during each perf-log method. + protected transient Map timerContexts = new HashMap<>(); + + private void beginMetrics(String method) { + Timer timer = Metrics.getOrCreateTimer(MetricsConstants.API_PREFIX + method); + if (timer != null) { + timerContexts.put(method, timer.time()); + } + + } + + private void endMetrics(String method) { + Timer.Context context = timerContexts.remove(method); + if (context != null) { + context.close(); + } + } + + /** + * Cleans up any dangling perfLog metric call scopes. + */ + public void cleanupPerfLogMetrics() { + for (Timer.Context context : timerContexts.values()) { + context.close(); + } + timerContexts.clear(); + } +}