From 636d95e9cd63f3ce303d2504657f7aa12fde8479 Mon Sep 17 00:00:00 2001
From: Mohit Kataria <tihom88@gmail.com>
Date: Fri, 15 Mar 2019 15:28:25 +0530
Subject: [PATCH] OAK-8116:Expose text extraction metrics as sling metrics

---
 .../lucene/LuceneIndexProviderService.java      |  6 +++---
 .../index/search/ExtractedTextCache.java        | 12 ++++++++++++
 .../spi/binary/FulltextBinaryTextExtractor.java | 17 ++++++++++++++++-
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
index f4be7eda15..ac1ff789dc 100644
--- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
+++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
@@ -374,7 +374,7 @@ public class LuceneIndexProviderService {
         whiteboard = new OsgiWhiteboard(bundleContext);
         threadPoolSize = PropertiesUtil.toInteger(config.get(PROP_THREAD_POOL_SIZE), PROP_THREAD_POOL_SIZE_DEFAULT);
         initializeIndexDir(bundleContext, config);
-        initializeExtractedTextCache(bundleContext, config);
+        initializeExtractedTextCache(bundleContext, config, statisticsProvider);
         tracker = createTracker(bundleContext, config);
         indexProvider = new LuceneIndexProvider(tracker, scorerFactory, augmentorFactory);
         initializeActiveBlobCollector(whiteboard, config);
@@ -680,7 +680,7 @@ public class LuceneIndexProviderService {
         log.debug("Lucene46Codec is loaded: {}", ensureLucene46CodecLoaded);
     }
 
-    private void initializeExtractedTextCache(BundleContext bundleContext, Map<String, ?> config) {
+    private void initializeExtractedTextCache(BundleContext bundleContext, Map<String, ?> config, StatisticsProvider statisticsProvider) {
         int cacheSizeInMB = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_SIZE),
                 PROP_EXTRACTED_TEXT_CACHE_SIZE_DEFAULT);
         int cacheExpiryInSecs = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_EXPIRY),
@@ -692,7 +692,7 @@ public class LuceneIndexProviderService {
                 cacheSizeInMB * ONE_MB,
                 cacheExpiryInSecs,
                 alwaysUsePreExtractedCache,
-                indexDir);
+                indexDir, statisticsProvider);
         if (extractedTextProvider != null){
             registerExtractedTextProvider(extractedTextProvider);
         }
diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java
index 6330285af1..bd8e2cb556 100644
--- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java
+++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java
@@ -47,6 +47,7 @@ import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
 import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
 import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditor;
+import org.apache.jackrabbit.oak.stats.StatisticsProvider;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
@@ -79,6 +80,7 @@ public class ExtractedTextCache {
     private long totalTextSize;
     private long totalTime;
     private int preFetchedCount;
+    private final StatisticsProvider statisticsProvider;
 
     // the actual cache. key: content id, value: extracted text
     private final Cache<String, String> cache;
@@ -97,6 +99,11 @@ public class ExtractedTextCache {
 
     public ExtractedTextCache(long maxWeight, long expiryTimeInSecs, boolean alwaysUsePreExtractedCache,
                               File indexDir) {
+        this(maxWeight, expiryTimeInSecs, alwaysUsePreExtractedCache, indexDir, null);
+    }
+
+    public ExtractedTextCache(long maxWeight, long expiryTimeInSecs, boolean alwaysUsePreExtractedCache,
+                              File indexDir, StatisticsProvider statisticsProvider) {
         if (maxWeight > 0) {
             cache = CacheBuilder.newBuilder()
                     .weigher(EmpiricalWeigher.INSTANCE)
@@ -114,6 +121,7 @@ public class ExtractedTextCache {
         this.timeoutMap = new ConcurrentHashMap<>();
         this.indexDir = indexDir;
         loadTimeoutMap();
+        this.statisticsProvider = statisticsProvider;
     }
 
     /**
@@ -191,6 +199,10 @@ public class ExtractedTextCache {
         this.totalTextSize += textLength;
     }
 
+    public StatisticsProvider getStatisticsProvider() {
+        return statisticsProvider;
+    }
+
     public TextExtractionStatsMBean getStatsMBean() {
         return new TextExtractionStatsMBean() {
             @Override
diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java
index b753bcff3b..92c7ebfd40 100644
--- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java
+++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java
@@ -25,6 +25,7 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
 import com.google.common.collect.Lists;
@@ -40,6 +41,9 @@ import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditorContext;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard;
+import org.apache.jackrabbit.oak.stats.StatsOptions;
+import org.apache.jackrabbit.oak.stats.TimerStats;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -60,6 +64,7 @@ import static org.apache.jackrabbit.oak.plugins.index.search.spi.editor.Fulltext
  *
  */
 public class FulltextBinaryTextExtractor {
+  private final static String TEXT_EXTRACTION_TIMER_METRIC_NAME = "TEXT_EXTRACTION_TIME";
 
   private static final Logger log = LoggerFactory.getLogger(FulltextBinaryTextExtractor.class);
   private static final Parser defaultParser = createDefaultParser();
@@ -70,6 +75,7 @@ public class FulltextBinaryTextExtractor {
   private final boolean reindex;
   private Parser parser;
   private TikaConfigHolder tikaConfig;
+  private TimerStats textExtractionTimerMetricStats;
   /**
    * The media types supported by the parser used.
    */
@@ -125,7 +131,16 @@ public class FulltextBinaryTextExtractor {
   private String parseStringValue(Blob v, Metadata metadata, String path, String propertyName) {
     String text = extractedTextCache.get(path, propertyName, v, reindex);
     if (text == null){
-      text = parseStringValue0(v, metadata, path);
+      if (extractedTextCache.getStatisticsProvider() != null) {
+        textExtractionTimerMetricStats = extractedTextCache.getStatisticsProvider().
+                getTimer(TEXT_EXTRACTION_TIMER_METRIC_NAME, StatsOptions.METRICS_ONLY);
+        TimerStats.Context context = textExtractionTimerMetricStats.time();
+        text = parseStringValue0(v, metadata, path);
+        context.stop();
+      }
+      else {
+        text = parseStringValue0(v, metadata, path);
+      }
     }
     return text;
   }
-- 
2.17.1

