diff --git a/common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/JvmMetrics.java b/common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/JvmMetrics.java new file mode 100644 index 0000000..619c429 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/JvmMetrics.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.metrics.metrics2; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.lang.management.MemoryUsage; +import java.lang.management.ThreadInfo; +import java.lang.management.ThreadMXBean; +import java.lang.management.GarbageCollectorMXBean; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.hadoop.log.metrics.EventCounter; +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsInfo; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.MetricsSource; +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.lib.Interns; + +import org.apache.hadoop.hive.common.JvmPauseMonitor; + +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.GcCount; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.GcNumInfoThresholdExceeded; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.GcNumWarnThresholdExceeded; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.GcTimeMillis; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.GcTotalExtraSleepTime; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.LogError; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.LogFatal; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.LogInfo; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.LogWarn; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.MemHeapCommittedM; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.MemHeapMaxM; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.MemHeapUsedM; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.MemMaxM; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.MemNonHeapCommittedM; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.MemNonHeapMaxM; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.MemNonHeapUsedM; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.ThreadsBlocked; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.ThreadsNew; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.ThreadsRunnable; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.ThreadsTerminated; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.ThreadsTimedWaiting; +import static org.apache.hadoop.hive.common.metrics.metrics2.JvmMetricsInfo.ThreadsWaiting; +import static org.apache.hadoop.hive.common.metrics.metrics2.MsInfo.ProcessName; +import static org.apache.hadoop.hive.common.metrics.metrics2.MsInfo.SessionId; + +/** + * Based on the JvmMetrics from Apache Hadoop. + */ +public class JvmMetrics implements MetricsSource { + + private static final float M = 1024*1024; + private static final float MEMORY_MAX_UNLIMITED_MB = -1; + + private final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); + private final List gcBeans = + ManagementFactory.getGarbageCollectorMXBeans(); + private final ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean(); + private final String processName, sessionId; + private JvmPauseMonitor pauseMonitor = null; + private final ConcurrentHashMap gcInfoCache = + new ConcurrentHashMap<>(); + + private JvmMetrics(String processName, String sessionId) { + this.processName = processName; + this.sessionId = sessionId; + } + + public void setPauseMonitor(final JvmPauseMonitor pauseMonitor) { + this.pauseMonitor = pauseMonitor; + } + + public static JvmMetrics create(String processName, String sessionId, + MetricsSystem ms) { + return ms.register(JvmMetricsInfo.JvmMetrics.name(), + JvmMetricsInfo.JvmMetrics.description(), + new JvmMetrics(processName, sessionId)); + } + + @Override + public void getMetrics(MetricsCollector collector, boolean all) { + MetricsRecordBuilder rb = collector.addRecord(JvmMetricsInfo.JvmMetrics) + .setContext("jvm").tag(ProcessName, processName) + .tag(SessionId, sessionId); + getMemoryUsage(rb); + getGcUsage(rb); + getThreadUsage(rb); + getEventCounters(rb); + } + + private void getMemoryUsage(MetricsRecordBuilder rb) { + MemoryUsage memNonHeap = memoryMXBean.getNonHeapMemoryUsage(); + MemoryUsage memHeap = memoryMXBean.getHeapMemoryUsage(); + Runtime runtime = Runtime.getRuntime(); + rb.addGauge(MemNonHeapUsedM, memNonHeap.getUsed() / M) + .addGauge(MemNonHeapCommittedM, memNonHeap.getCommitted() / M) + .addGauge(MemNonHeapMaxM, calculateMaxMemoryUsage(memNonHeap)) + .addGauge(MemHeapUsedM, memHeap.getUsed() / M) + .addGauge(MemHeapCommittedM, memHeap.getCommitted() / M) + .addGauge(MemHeapMaxM, calculateMaxMemoryUsage(memHeap)) + .addGauge(MemMaxM, runtime.maxMemory() / M); + } + + private float calculateMaxMemoryUsage(MemoryUsage memHeap) { + long max = memHeap.getMax() ; + + if (max == -1) { + return MEMORY_MAX_UNLIMITED_MB; + } + + return max / M; + } + + private void getGcUsage(MetricsRecordBuilder rb) { + long count = 0; + long timeMillis = 0; + for (GarbageCollectorMXBean gcBean : gcBeans) { + long c = gcBean.getCollectionCount(); + long t = gcBean.getCollectionTime(); + MetricsInfo[] gcInfo = getGcInfo(gcBean.getName()); + rb.addCounter(gcInfo[0], c).addCounter(gcInfo[1], t); + count += c; + timeMillis += t; + } + rb.addCounter(GcCount, count) + .addCounter(GcTimeMillis, timeMillis); + + if (pauseMonitor != null) { + rb.addCounter(GcNumWarnThresholdExceeded, + pauseMonitor.getNumGcWarnThreadholdExceeded()); + rb.addCounter(GcNumInfoThresholdExceeded, + pauseMonitor.getNumGcInfoThresholdExceeded()); + rb.addCounter(GcTotalExtraSleepTime, + pauseMonitor.getTotalGcExtraSleepTime()); + } + } + + private MetricsInfo[] getGcInfo(String gcName) { + MetricsInfo[] gcInfo = gcInfoCache.get(gcName); + if (gcInfo == null) { + gcInfo = new MetricsInfo[2]; + gcInfo[0] = Interns.info("GcCount" + gcName, "GC Count for " + gcName); + gcInfo[1] = Interns + .info("GcTimeMillis" + gcName, "GC Time for " + gcName); + MetricsInfo[] previousGcInfo = gcInfoCache.putIfAbsent(gcName, gcInfo); + if (previousGcInfo != null) { + return previousGcInfo; + } + } + return gcInfo; + } + + private void getThreadUsage(MetricsRecordBuilder rb) { + int threadsNew = 0; + int threadsRunnable = 0; + int threadsBlocked = 0; + int threadsWaiting = 0; + int threadsTimedWaiting = 0; + int threadsTerminated = 0; + long threadIds[] = threadMXBean.getAllThreadIds(); + for (ThreadInfo threadInfo : threadMXBean.getThreadInfo(threadIds, 0)) { + if (threadInfo == null) continue; // race protection + switch (threadInfo.getThreadState()) { + case NEW: threadsNew++; break; + case RUNNABLE: threadsRunnable++; break; + case BLOCKED: threadsBlocked++; break; + case WAITING: threadsWaiting++; break; + case TIMED_WAITING: threadsTimedWaiting++; break; + case TERMINATED: threadsTerminated++; break; + } + } + rb.addGauge(ThreadsNew, threadsNew) + .addGauge(ThreadsRunnable, threadsRunnable) + .addGauge(ThreadsBlocked, threadsBlocked) + .addGauge(ThreadsWaiting, threadsWaiting) + .addGauge(ThreadsTimedWaiting, threadsTimedWaiting) + .addGauge(ThreadsTerminated, threadsTerminated); + } + + private void getEventCounters(MetricsRecordBuilder rb) { + rb.addCounter(LogFatal, EventCounter.getFatal()) + .addCounter(LogError, EventCounter.getError()) + .addCounter(LogWarn, EventCounter.getWarn()) + .addCounter(LogInfo, EventCounter.getInfo()); + } +} diff --git a/common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/JvmMetricsInfo.java b/common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/JvmMetricsInfo.java new file mode 100644 index 0000000..7a9bfa0 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/JvmMetricsInfo.java @@ -0,0 +1,47 @@ +package org.apache.hadoop.hive.common.metrics.metrics2; + +import com.google.common.base.Objects; + +import org.apache.hadoop.metrics2.MetricsInfo; + +/** + * Based on the JvmMetricsInfo from Apache Hadoop. + */ +enum JvmMetricsInfo implements MetricsInfo { + JvmMetrics("JVM related metrics etc."), // record info + // metrics + MemNonHeapUsedM("Non-heap memory used in MB"), + MemNonHeapCommittedM("Non-heap memory committed in MB"), + MemNonHeapMaxM("Non-heap memory max in MB"), + MemHeapUsedM("Heap memory used in MB"), + MemHeapCommittedM("Heap memory committed in MB"), + MemHeapMaxM("Heap memory max in MB"), + MemMaxM("Max memory size in MB"), + GcCount("Total GC count"), + GcTimeMillis("Total GC time in milliseconds"), + ThreadsNew("Number of new threads"), + ThreadsRunnable("Number of runnable threads"), + ThreadsBlocked("Number of blocked threads"), + ThreadsWaiting("Number of waiting threads"), + ThreadsTimedWaiting("Number of timed waiting threads"), + ThreadsTerminated("Number of terminated threads"), + LogFatal("Total number of fatal log events"), + LogError("Total number of error log events"), + LogWarn("Total number of warning log events"), + LogInfo("Total number of info log events"), + GcNumWarnThresholdExceeded("Number of times that the GC warn threshold is exceeded"), + GcNumInfoThresholdExceeded("Number of times that the GC info threshold is exceeded"), + GcTotalExtraSleepTime("Total GC extra sleep time in milliseconds"); + + private final String desc; + + JvmMetricsInfo(String desc) { this.desc = desc; } + + @Override public String description() { return desc; } + + @Override public String toString() { + return Objects.toStringHelper(this) + .add("name", name()).add("description", desc) + .toString(); + } +} diff --git a/common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/MsInfo.java b/common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/MsInfo.java new file mode 100644 index 0000000..cdce640 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/metrics/metrics2/MsInfo.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.metrics.metrics2; + +import com.google.common.base.Objects; + +import org.apache.hadoop.metrics2.MetricsInfo; + +/** + * Based on the MsInfo from Apache Hadoop. + */ +public enum MsInfo implements MetricsInfo { + SessionId("Session ID"), + ProcessName("Process name"); + + private final String desc; + + MsInfo(String desc) { + this.desc = desc; + } + + @Override + public String description() { + return desc; + } + + @Override + public String toString() { + return Objects.toStringHelper(this) + .add("name", name()).add("description", desc) + .toString(); + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java index 5457658..a7d5216 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java @@ -31,6 +31,7 @@ import javax.management.ObjectName; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.JvmPauseMonitor; import org.apache.hadoop.hive.common.LogUtils; import org.apache.hadoop.hive.common.UgiFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -66,7 +67,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.util.ExitUtil; -import org.apache.hadoop.util.JvmPauseMonitor; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.util.ConverterUtils; diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonExecutorMetrics.java b/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonExecutorMetrics.java index 1110683..07f7717 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonExecutorMetrics.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonExecutorMetrics.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.llap.metrics; +import static org.apache.hadoop.hive.common.metrics.metrics2.MsInfo.ProcessName; +import static org.apache.hadoop.hive.common.metrics.metrics2.MsInfo.SessionId; import static org.apache.hadoop.hive.llap.metrics.LlapDaemonExecutorInfo.ExecutorAvailableFreeSlots; import static org.apache.hadoop.hive.llap.metrics.LlapDaemonExecutorInfo.ExecutorAvailableFreeSlotsPercent; import static org.apache.hadoop.hive.llap.metrics.LlapDaemonExecutorInfo.ExecutorCacheMemoryPerInstance; @@ -41,8 +43,6 @@ import static org.apache.hadoop.hive.llap.metrics.LlapDaemonExecutorInfo.ExecutorTotalPreemptionTimeLost; import static org.apache.hadoop.hive.llap.metrics.LlapDaemonExecutorInfo.ExecutorTotalPreemptionTimeToKill; import static org.apache.hadoop.hive.llap.metrics.LlapDaemonExecutorInfo.ExecutorWaitQueueSize; -import static org.apache.hadoop.metrics2.impl.MsInfo.ProcessName; -import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId; import java.lang.management.ManagementFactory; import java.lang.management.ThreadInfo; @@ -50,6 +50,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.hive.common.metrics.metrics2.JvmMetrics; import org.apache.hadoop.hive.llap.daemon.impl.ContainerRunnerImpl; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsInfo; @@ -63,7 +64,6 @@ import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.metrics2.lib.MutableQuantiles; -import org.apache.hadoop.metrics2.source.JvmMetrics; /** * Metrics about the llap daemon executors. diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java index ad0cc5b..29f0696 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java @@ -53,6 +53,7 @@ import com.google.common.util.concurrent.Futures; import org.apache.commons.lang.mutable.MutableInt; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.JvmPauseMonitor; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.metrics.LlapMetricsSystem; @@ -64,7 +65,6 @@ import org.apache.hadoop.hive.llap.tezplugins.helpers.MonotonicClock; import org.apache.hadoop.hive.llap.tezplugins.scheduler.LoggingFutureCallback; import org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerMetrics; -import org.apache.hadoop.util.JvmPauseMonitor; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/metrics/LlapTaskSchedulerMetrics.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/metrics/LlapTaskSchedulerMetrics.java index 04fd815..2c9477e 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/metrics/LlapTaskSchedulerMetrics.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/metrics/LlapTaskSchedulerMetrics.java @@ -15,6 +15,8 @@ */ package org.apache.hadoop.hive.llap.tezplugins.metrics; +import static org.apache.hadoop.hive.common.metrics.metrics2.MsInfo.ProcessName; +import static org.apache.hadoop.hive.common.metrics.metrics2.MsInfo.SessionId; import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerClusterNodeCount; import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerCompletedDagCount; import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerCpuCoresPerInstance; @@ -28,9 +30,8 @@ import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerRunningTaskCount; import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerSchedulableTaskCount; import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerSuccessfulTaskCount; -import static org.apache.hadoop.metrics2.impl.MsInfo.ProcessName; -import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId; +import org.apache.hadoop.hive.common.metrics.metrics2.JvmMetrics; import org.apache.hadoop.hive.llap.metrics.LlapMetricsSystem; import org.apache.hadoop.hive.llap.metrics.MetricsUtils; import org.apache.hadoop.metrics2.MetricsCollector; @@ -43,7 +44,6 @@ import org.apache.hadoop.metrics2.lib.MutableCounterInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; -import org.apache.hadoop.metrics2.source.JvmMetrics; /** * Metrics about the llap task scheduler.