From b6aad6d6f121063e3fbb9c5430dea39eaf428407 Mon Sep 17 00:00:00 2001 From: Ankit Agarwal Date: Wed, 25 May 2016 18:35:04 +0530 Subject: [PATCH] OAK-4401 Excerpt Highlighting for a property is not correct --- .../oak/query/fulltext/SimpleExcerptProvider.java | 32 ++++++++++++++++++++++ .../query/fulltext/SimpleExcerptProviderTest.java | 17 ++++++++++++ 2 files changed, 49 insertions(+) diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java index 4359e91..18cab05 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java @@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.query.fulltext; import java.util.BitSet; import java.util.HashSet; import java.util.Set; +import java.util.regex.Pattern; import com.google.common.base.Splitter; import com.google.common.collect.ImmutableSet; @@ -244,6 +245,10 @@ public class SimpleExcerptProvider { } else { endIndex = text.length(); } + } else { + if ( !isCompleteWord(text, token, index, endIndex)) { + index = endIndex; + } } while (index < endIndex) { highlightBits.set(index++); @@ -251,6 +256,33 @@ public class SimpleExcerptProvider { } } + private static boolean isCompleteWord(String text, String token, int startIndex, int endIndex) { + String regex = null; + String subText = null; + + if (endIndex > text.length()) { + endIndex = text.length(); + } + if (endIndex == text.length()) { + regex = token; + } else { + regex = token + "[\\W]"; + endIndex = endIndex +1; + } + if (startIndex == 0) { + subText = text.substring(startIndex, endIndex); + } else { + subText = text.substring(startIndex - 1, endIndex); + regex = "[\\W]" + regex; + + } + + if (regex == null || subText == null) { + return false; + } + return Pattern.matches(regex, subText); + } + public static PropertyValue getExcerpt(PropertyValue value) { Splitter listSplitter = Splitter.on(',').trimResults().omitEmptyStrings(); StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN); diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java index f941a7a..ff4ed38 100644 --- a/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java @@ -59,6 +59,23 @@ public class SimpleExcerptProviderTest { } } + @Test + public void hightlightCompleteWordOnly() throws Exception { + + assertEquals("
official conflict of interest
", + highlight(sb("official conflict of interest"), of("of"))); + + assertEquals("
of to new city
", + highlight(sb("of to new city"), of("of"))); + + assertEquals("
out of the roof
", + highlight(sb("out of the roof"), of("of"))); + + assertEquals("
well this is of
", + highlight(sb("well this is of"), of("of"))); + + } + private static String randomString(Random r, String set) { int len = r.nextInt(10); StringBuilder buff = new StringBuilder(); -- 1.9.5 (Apple Git-50.3)