diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/TimelineDataManagerMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/TimelineDataManagerMetrics.java index 3591b39..5e1aed8 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/TimelineDataManagerMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/TimelineDataManagerMetrics.java @@ -24,7 +24,11 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableRate; -/** This class tracks metrics for the TimelineDataManager. */ +/** + * This class tracks metrics for the TimelineDataManager. + * Note: Only ATS v1 writes go through TimelineDataManager. Write related + * metrics does not include ATS v1.5 operations. + */ @Metrics(about="Metrics for TimelineDataManager", context="yarn") public class TimelineDataManagerMetrics { @Metric("getEntities calls") diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityCacheItem.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityCacheItem.java index 7eec7c3..fe88608 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityCacheItem.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityCacheItem.java @@ -84,14 +84,17 @@ public synchronized TimelineStore getStore() { * @param aclManager * @param jsonFactory * @param objMapper + * @param metrics * @return a {@link org.apache.hadoop.yarn.server.timeline.TimelineStore} * object filled with all entities in the group. * @throws IOException */ public synchronized TimelineStore refreshCache(TimelineEntityGroupId groupId, TimelineACLsManager aclManager, JsonFactory jsonFactory, - ObjectMapper objMapper) throws IOException { + ObjectMapper objMapper, EntityGroupFSTimelineStoreMetrics metrics) + throws IOException { if (needRefresh()) { + long startTime = Time.monotonicNow(); // If an application is not finished, we only update summary logs (and put // new entities into summary storage). // Otherwise, since the application is done, we can update detail logs. @@ -108,7 +111,7 @@ public synchronized TimelineStore refreshCache(TimelineEntityGroupId groupId, store.start(); } List removeList = new ArrayList<>(); - try(TimelineDataManager tdm = + try (TimelineDataManager tdm = new TimelineDataManager(store, aclManager)) { tdm.init(config); tdm.start(); @@ -133,6 +136,8 @@ public synchronized TimelineStore refreshCache(TimelineEntityGroupId groupId, appLogs.getDetailLogs().removeAll(removeList); } updateRefreshTimeToNow(); + metrics.incrCacheRefreshOps(); + metrics.addCacheRefreshTime(Time.monotonicNow() - startTime); } else { LOG.debug("Cache new enough, skip refreshing"); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java index 18b8951..bf7529e 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java @@ -128,12 +128,17 @@ private List cacheIdPlugins; private Map cachedLogs; + @VisibleForTesting + @InterfaceAudience.Private + EntityGroupFSTimelineStoreMetrics metrics; + public EntityGroupFSTimelineStore() { super(EntityGroupFSTimelineStore.class.getSimpleName()); } @Override protected void serviceInit(Configuration conf) throws Exception { + metrics = EntityGroupFSTimelineStoreMetrics.create(); summaryStore = createSummaryStore(); addService(summaryStore); @@ -316,6 +321,7 @@ protected void serviceStop() throws Exception { @InterfaceAudience.Private @VisibleForTesting int scanActiveLogs() throws IOException { + long startTime = Time.monotonicNow(); RemoteIterator iter = list(activeRootPath); int logsToScanCount = 0; while (iter.hasNext()) { @@ -331,6 +337,7 @@ int scanActiveLogs() throws IOException { LOG.debug("Unable to parse entry {}", name); } } + metrics.addLogScanTime(Time.monotonicNow() - startTime); return logsToScanCount; } @@ -423,6 +430,7 @@ void cleanLogs(Path dirpath, FileSystem fs, long retainMillis) if (!fs.delete(dirpath, true)) { LOG.error("Unable to remove " + dirpath); } + metrics.incrLogsDirsCleaned(); } catch (IOException e) { LOG.error("Unable to remove " + dirpath, e); } @@ -588,6 +596,7 @@ public synchronized void parseSummaryLogs() throws IOException { @VisibleForTesting synchronized void parseSummaryLogs(TimelineDataManager tdm) throws IOException { + long startTime = Time.monotonicNow(); if (!isDone()) { LOG.debug("Try to parse summary log for log {} in {}", appId, appDirPath); @@ -605,8 +614,10 @@ synchronized void parseSummaryLogs(TimelineDataManager tdm) List removeList = new ArrayList(); for (LogInfo log : summaryLogs) { if (fs.exists(log.getPath(appDirPath))) { - log.parseForStore(tdm, appDirPath, isDone(), jsonFactory, + long summaryEntityParsed + = log.parseForStore(tdm, appDirPath, isDone(), jsonFactory, objMapper, fs); + metrics.incrEntitiesScannedToSummary(summaryEntityParsed); } else { // The log may have been removed, remove the log removeList.add(log); @@ -615,6 +626,7 @@ synchronized void parseSummaryLogs(TimelineDataManager tdm) } } summaryLogs.removeAll(removeList); + metrics.addSummaryDataReadTime(Time.monotonicNow() - startTime); } // scans for new logs and returns the modification timestamp of the @@ -787,6 +799,7 @@ public void run() { @Override public void run() { LOG.debug("Cleaner starting"); + long startTime = Time.monotonicNow(); try { cleanLogs(doneRootPath, fs, logRetainMillis); } catch (Exception e) { @@ -796,6 +809,8 @@ public void run() { } else { LOG.error("Error cleaning files", e); } + } finally { + metrics.addLogCleanTime(Time.monotonicNow() - startTime); } LOG.debug("Cleaner finished"); } @@ -824,11 +839,13 @@ void setCachedLogs(TimelineEntityGroupId groupId, EntityCacheItem cacheItem) { if (storeForId != null) { LOG.debug("Adding {} as a store for the query", storeForId.getName()); stores.add(storeForId); + metrics.incrGetEntityToDetailOps(); } } if (stores.size() == 0) { LOG.debug("Using summary store for {}", entityType); stores.add(this.summaryStore); + metrics.incrGetEntityToSummaryOps(); } return stores; } @@ -898,7 +915,7 @@ private TimelineStore getCachedStore(TimelineEntityGroupId groupId) AppLogs appLogs = cacheItem.getAppLogs(); LOG.debug("try refresh cache {} {}", groupId, appLogs.getAppId()); store = cacheItem.refreshCache(groupId, aclManager, jsonFactory, - objMapper); + objMapper, metrics); } else { LOG.warn("AppLogs for group id {} is null", groupId); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStoreMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStoreMetrics.java new file mode 100644 index 0000000..6c7134f --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStoreMetrics.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.timeline; + +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.annotation.Metrics; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableRate; + +/** This class tracks metrics for the EntityGroupFSTimelineStore. */ +@Metrics(about="Metrics for EntityGroupFSTimelineStore", context="yarn") +public class EntityGroupFSTimelineStoreMetrics { + @Metric("entities scanned into the summary storage") + private MutableCounterLong entitiesScannedToSummary; + + @Metric("log cleaner dirs purged") + private MutableCounterLong logsDirsCleaned; + + @Metric("getEntity calls to summary storage") + private MutableCounterLong getEntityToSummaryOps; + + @Metric("getEntity calls to detail storage") + private MutableCounterLong getEntityToDetailOps; + + @Metric("cache storage refresh calls") + private MutableCounterLong cacheRefreshOps; + + @Metric("cache storage refresh time") + private MutableRate cacheRefreshTime; + + @Metric("summary data read time") + private MutableRate summaryDataReadTime; + + @Metric("active log scan time") + private MutableRate logScanTime; + + @Metric("log cleaner purging time") + private MutableRate logCleanTime; + + private static EntityGroupFSTimelineStoreMetrics instance = null; + + EntityGroupFSTimelineStoreMetrics() { + } + + public static synchronized EntityGroupFSTimelineStoreMetrics create() { + if (instance == null) { + MetricsSystem ms = DefaultMetricsSystem.instance(); + instance = ms.register(new EntityGroupFSTimelineStoreMetrics()); + } + return instance; + } + + public void incrEntitiesScannedToSummary(long delta) { + entitiesScannedToSummary.incr(delta); + } + + public void incrLogsDirsCleaned() { + logsDirsCleaned.incr(); + } + + public void incrGetEntityToSummaryOps() { + getEntityToSummaryOps.incr(); + } + + public void incrGetEntityToDetailOps() { + getEntityToDetailOps.incr(); + } + + public void incrCacheRefreshOps() { + cacheRefreshOps.incr(); + } + + public void addCacheRefreshTime(long msec) { + cacheRefreshTime.add(msec); + } + + public void addSummaryDataReadTime(long msec) { + summaryDataReadTime.add(msec); + } + + public void addLogScanTime(long msec) { + logScanTime.add(msec); + } + + public void addLogCleanTime(long msec) { + logCleanTime.add(msec); + } + + MutableCounterLong getEntitiesScannedToSummary() { + return entitiesScannedToSummary; + } + + MutableCounterLong getLogsDirsCleaned() { + return logsDirsCleaned; + } + + MutableCounterLong getGetEntityToSummaryOps() { + return getEntityToSummaryOps; + } + + MutableCounterLong getGetEntityToDetailOps() { + return getEntityToDetailOps; + } + + MutableCounterLong getCacheRefreshOps() { + return cacheRefreshOps; + } + + MutableRate getCacheRefreshTime() { + return cacheRefreshTime; + } + + MutableRate getSummaryDataReadTime() { + return summaryDataReadTime; + } + + MutableRate getLogScanTime() { + return logScanTime; + } + + MutableRate getLogCleanTime() { + return logCleanTime; + } +} + diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LogInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LogInfo.java index bc80175..96bca8d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LogInfo.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LogInfo.java @@ -98,13 +98,14 @@ boolean matchesGroupId(String groupId){ )); } - public void parseForStore(TimelineDataManager tdm, Path appDirPath, + public long parseForStore(TimelineDataManager tdm, Path appDirPath, boolean appCompleted, JsonFactory jsonFactory, ObjectMapper objMapper, FileSystem fs) throws IOException { LOG.debug("Parsing for log dir {} on attempt {}", appDirPath, attemptDirName); Path logPath = getPath(appDirPath); FileStatus status = fs.getFileStatus(logPath); + long numParsed = 0; if (status != null) { long startTime = Time.monotonicNow(); try { @@ -113,6 +114,7 @@ public void parseForStore(TimelineDataManager tdm, Path appDirPath, objMapper, fs); LOG.info("Parsed {} entities from {} in {} msec", count, logPath, Time.monotonicNow() - startTime); + numParsed += count; } catch (RuntimeException e) { // If AppLogs cannot parse this log, it may be corrupted or just empty if (e.getCause() instanceof JsonParseException && @@ -125,6 +127,7 @@ public void parseForStore(TimelineDataManager tdm, Path appDirPath, } else { LOG.warn("{} no longer exists. Skip for scanning. ", logPath); } + return numParsed; } private long parsePath(TimelineDataManager tdm, Path logPath, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/TestEntityGroupFSTimelineStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/TestEntityGroupFSTimelineStore.java index 4e491fc..f2e328a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/TestEntityGroupFSTimelineStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/TestEntityGroupFSTimelineStore.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -177,12 +178,15 @@ public void testMoveToDone() throws Exception { @Test public void testParseSummaryLogs() throws Exception { TimelineDataManager tdm = PluginStoreTestUtils.getTdmWithMemStore(config); + MutableCounterLong scanned = store.metrics.getEntitiesScannedToSummary(); + long beforeScan = scanned.value(); EntityGroupFSTimelineStore.AppLogs appLogs = store.new AppLogs(TEST_APPLICATION_ID, testAppDirPath, AppState.COMPLETED); appLogs.scanForLogs(); appLogs.parseSummaryLogs(tdm); PluginStoreTestUtils.verifyTestEntities(tdm); + assertEquals(beforeScan + 2L, scanned.value()); } @Test @@ -227,6 +231,8 @@ public void testCleanLogs() throws Exception { fs.mkdirs(dirPathEmpty); // Should retain all logs after this run + MutableCounterLong dirsCleaned = store.metrics.getLogsDirsCleaned(); + long before = dirsCleaned.value(); store.cleanLogs(testDoneDirPath, fs, 10000); assertTrue(fs.exists(irrelevantDirPath)); assertTrue(fs.exists(irrelevantFilePath)); @@ -256,6 +262,7 @@ public void testCleanLogs() throws Exception { // appDirClean and appDirEmpty should be cleaned up assertFalse(fs.exists(appDirClean)); assertFalse(fs.exists(appDirEmpty)); + assertEquals(before + 2L, dirsCleaned.value()); } @Test @@ -272,6 +279,12 @@ public void testPluginRead() throws Exception { cacheItem.setAppLogs(appLogs); store.setCachedLogs( EntityGroupPlugInForTest.getStandardTimelineGroupId(), cacheItem); + MutableCounterLong detailLogEntityRead = + store.metrics.getGetEntityToDetailOps(); + MutableCounterLong cacheRefresh = store.metrics.getCacheRefreshOps(); + long numEntityReadBefore = detailLogEntityRead.value(); + long cacheRefreshBefore = cacheRefresh.value(); + // Generate TDM TimelineDataManager tdm = PluginStoreTestUtils.getTdmWithStore(config, store); @@ -290,6 +303,9 @@ public void testPluginRead() throws Exception { for (TimelineEntity entity : entities.getEntities()) { assertEquals(entityNew.getStartTime(), entity.getStartTime()); } + // Verify metrics + assertEquals(numEntityReadBefore + 2L, detailLogEntityRead.value()); + assertEquals(cacheRefreshBefore + 1L, cacheRefresh.value()); } @Test @@ -298,6 +314,9 @@ public void testSummaryRead() throws Exception { EntityGroupFSTimelineStore.AppLogs appLogs = store.new AppLogs(TEST_APPLICATION_ID, testAppDirPath, AppState.COMPLETED); + MutableCounterLong summaryLogEntityRead + = store.metrics.getGetEntityToSummaryOps(); + long numEntityReadBefore = summaryLogEntityRead.value(); TimelineDataManager tdm = PluginStoreTestUtils.getTdmWithStore(config, store); appLogs.scanForLogs(); @@ -313,6 +332,8 @@ public void testSummaryRead() throws Exception { for (TimelineEntity entity : entities.getEntities()) { assertEquals((Long) 123l, entity.getStartTime()); } + // Verify metrics + assertEquals(numEntityReadBefore + 5L, summaryLogEntityRead.value()); }