Index: oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
===================================================================
--- oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java (revision 1712931)
+++ oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java (working copy)
@@ -35,18 +35,18 @@
private final Query query;
private final Tree[] trees;
-
+
/**
* The column values.
*/
private final PropertyValue[] values;
-
+
/**
* Whether the value at the given index is used for comparing rows (used
* within hashCode and equals). If null, all columns are distinct.
*/
private final boolean[] distinctValues;
-
+
/**
* The values used for ordering.
*/
@@ -59,7 +59,7 @@
this.distinctValues = distinctValues;
this.orderValues = orderValues;
}
-
+
PropertyValue[] getOrderValues() {
return orderValues;
}
@@ -107,15 +107,22 @@
}
// OAK-318:
// somebody might call rep:excerpt(text)
- // even thought the query doesn't contain that column
+ // even though the query doesn't contain that column
if (columnName.startsWith(QueryImpl.REP_EXCERPT)) {
- // missing excerpt, generate a default value
- String ex = SimpleExcerptProvider.getExcerpt(getPath(), columnName,
- query, true);
- if (ex != null) {
- return PropertyValues.newString(ex);
+ int columnIndex = query.getColumnIndex(QueryImpl.REP_EXCERPT);
+ if (columnIndex >= 0 && QueryImpl.REP_EXCERPT.equals(columnName) || SimpleExcerptProvider.REP_EXCERPT_FN.
+ equals(columnName)) {
+ return SimpleExcerptProvider.getExcerpt(values[columnIndex]);
+ // TODO : make it possible to extract property level excerpts, e.g. rep:excerpt(text) from indexes
+ } else {
+ // missing excerpt, generate a default value
+ String ex = SimpleExcerptProvider.getExcerpt(getPath(), columnName,
+ query, true);
+ if (ex != null) {
+ return PropertyValues.newString(ex);
+ }
+ return PropertyValues.newString(getPath());
}
- return PropertyValues.newString(getPath());
}
throw new IllegalArgumentException("Column not found: " + columnName);
}
@@ -146,8 +153,8 @@
}
return buff.toString();
}
-
+
@Override
public int hashCode() {
int result = 1;
@@ -155,7 +162,7 @@
result = 31 * result + hashCodeOfValues();
return result;
}
-
+
private int hashCodeOfValues() {
int result = 1;
for (int i = 0; i < values.length; i++) {
@@ -249,4 +256,4 @@
}
-}
+}
\ No newline at end of file
Index: oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java
===================================================================
--- oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java (revision 1712931)
+++ oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java (working copy)
@@ -18,7 +18,6 @@
*/
package org.apache.jackrabbit.oak.query.ast;
-import static com.google.common.collect.Lists.newArrayList;
import static org.apache.jackrabbit.oak.query.ast.AstElementFactory.copyElementAndCheckReference;
import java.util.Collections;
Index: oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
===================================================================
--- oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java (revision 1712931)
+++ oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java (working copy)
@@ -16,13 +16,16 @@
*/
package org.apache.jackrabbit.oak.query.fulltext;
-import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
-
import java.util.BitSet;
import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
+import com.google.common.base.Splitter;
+import com.google.common.collect.ImmutableSet;
import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.PropertyValue;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
@@ -33,20 +36,24 @@
import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
import org.apache.jackrabbit.oak.query.ast.LiteralImpl;
import org.apache.jackrabbit.oak.query.ast.OrImpl;
+import org.apache.jackrabbit.oak.spi.query.PropertyValues;
-import com.google.common.collect.ImmutableSet;
+import static com.google.common.collect.Maps.newHashMap;
+import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
/**
* This class can extract excerpts from node.
*/
public class SimpleExcerptProvider {
- private static final String REP_EXCERPT_FN = "rep:excerpt(.)";
+ public static final String REP_EXCERPT_FN = "rep:excerpt(.)";
+ public static final String EXCERPT_END = "";
+ public static final String EXCERPT_BEGIN = "
";
private static int maxFragmentSize = 150;
public static String getExcerpt(String path, String columnName,
- Query query, boolean highlight) {
+ Query query, boolean highlight) {
if (path == null) {
return null;
}
@@ -72,7 +79,7 @@
for (PropertyState p : t.getProperties()) {
if (p.getType().tag() == Type.STRING.tag()
&& (columnName == null || columnName.equalsIgnoreCase(p
- .getName()))) {
+ .getName()))) {
text.append(separator);
separator = " ";
for (String v : p.getValue(Type.STRINGS)) {
@@ -82,8 +89,7 @@
}
Set searchToken = extractFulltext(query);
if (highlight && searchToken != null) {
- String h = highlight(text, searchToken);
- return h;
+ return highlight(text, searchToken);
}
return noHighlight(text);
}
@@ -140,32 +146,32 @@
Set out = new HashSet();
StringBuilder token = new StringBuilder();
boolean quote = false;
- for (int i = 0; i < in.length();) {
+ for (int i = 0; i < in.length(); ) {
final int c = in.codePointAt(i);
int length = Character.charCount(c);
switch (c) {
- case ' ':
- if (quote) {
- token.append(' ');
- } else if (token.length() > 0) {
- out.add(token.toString());
- token = new StringBuilder();
- }
- break;
- case '"':
- case '\'':
- if (quote) {
- quote = false;
- if (token.length() > 0) {
+ case ' ':
+ if (quote) {
+ token.append(' ');
+ } else if (token.length() > 0) {
out.add(token.toString());
token = new StringBuilder();
}
- } else {
- quote = true;
- }
- break;
- default:
- token.append(new String(Character.toChars(c)));
+ break;
+ case '"':
+ case '\'':
+ if (quote) {
+ quote = false;
+ if (token.length() > 0) {
+ out.add(token.toString());
+ token = new StringBuilder();
+ }
+ } else {
+ quote = true;
+ }
+ break;
+ default:
+ token.append(new String(Character.toChars(c)));
}
i += length;
}
@@ -198,7 +204,7 @@
for (String token : tokens) {
highlight(escaped, highlight, token);
}
- StringBuilder excerpt = new StringBuilder("");
+ StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN);
boolean strong = false;
for (int i = 0; i < escaped.length(); i++) {
if (highlight.get(i) && !strong) {
@@ -213,10 +219,10 @@
if (strong) {
excerpt.append("");
}
- excerpt.append("
");
+ excerpt.append(EXCERPT_END);
return excerpt.toString();
}
-
+
private static void highlight(String text, BitSet highlightBits, String token) {
boolean isLike = false;
if (token.endsWith("*")) {
@@ -247,5 +253,55 @@
}
}
}
-
+
+ public static PropertyValue getExcerpt(PropertyValue value) {
+ Splitter listSplitter = Splitter.on(',').trimResults().omitEmptyStrings();
+ StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN);
+ for (String v : listSplitter.splitToList(value.toString())) {
+ excerpt.append(v);
+ }
+ excerpt.append(EXCERPT_END);
+ return PropertyValues.newString(excerpt.toString());
+ }
+
+ public static PropertyValue getExcerpt(String columnName, PropertyValue value) {
+ StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN);
+ String property = extractExcerptProperty(columnName);
+
+ Splitter listSplitter = Splitter.on(',').trimResults().omitEmptyStrings();
+ List values = listSplitter.splitToList(value.toString());
+ for (String rev : values) {
+ String substring = rev.substring(1, rev.length() - 1);
+ Map m1 = listSplitter.withKeyValueSeparator(':')
+ .split(substring);
+ Map> m2 = newHashMap();
+ for (Map.Entry entry : m1.entrySet()) {
+ String k = entry.getKey();
+ String entryValue = entry.getValue();
+ List v = listSplitter.splitToList(entryValue);
+ m2.put(k, v);
+ }
+ if (m2.size() > 0) {
+ if (property != null) {
+ List propertyValues = m2.get(property);
+ if (addValues(excerpt, propertyValues)) break;
+ } else {
+ List propertyValues = m2.values().iterator().next();
+ if (addValues(excerpt, propertyValues)) break;
+ }
+ }
+ }
+ excerpt.append(EXCERPT_END);
+ return PropertyValues.newString(excerpt.toString());
+ }
+
+ private static boolean addValues(StringBuilder excerpt, List propertyValues) {
+ if (propertyValues != null && propertyValues.size() > 0) {
+ for (String pv : propertyValues) {
+ excerpt.append(pv);
+ }
+ return true;
+ }
+ return false;
+ }
}
Index: oak-lucene/pom.xml
===================================================================
--- oak-lucene/pom.xml (revision 1712931)
+++ oak-lucene/pom.xml (working copy)
@@ -42,6 +42,9 @@
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots
org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase
+ org.apache.jackrabbit.core.query.ExcerptTest#testQuotedPhrase
+ org.apache.jackrabbit.core.query.ExcerptTest#testHighlightJa
+ org.apache.jackrabbit.core.query.ExcerptTest#testEncodeIllegalCharsHighlights
org.apache.jackrabbit.core.query.QueryResultTest#testSkip
org.apache.jackrabbit.core.query.DerefTest#testDeref
org.apache.jackrabbit.core.query.DerefTest#testDerefInPredicate
@@ -206,6 +209,11 @@
${lucene.version}
provided
+
+ org.apache.lucene
+ lucene-highlighter
+ ${lucene.version}
+
Index: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
===================================================================
--- oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java (revision 1712931)
+++ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java (working copy)
@@ -73,12 +73,14 @@
import org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvanceFulltextQueryIndex;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
@@ -98,6 +100,12 @@
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.util.Version;
@@ -172,6 +180,9 @@
private final NodeAggregator aggregator;
+ private final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("", ""),
+ new SimpleHTMLEncoder(), null);
+
public LuceneIndex(IndexTracker tracker, NodeAggregator aggregator) {
this.tracker = tracker;
this.aggregator = aggregator;
@@ -298,7 +309,7 @@
return endOfData();
}
- private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher) throws IOException {
+ private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException {
IndexReader reader = searcher.getIndexReader();
PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
reader.document(doc.doc, visitor);
@@ -323,7 +334,7 @@
seenPaths.add(path);
}
- return new LuceneResultRow(path, doc.score);
+ return new LuceneResultRow(path, doc.score, excerpt);
}
return null;
}
@@ -363,8 +374,14 @@
LOG.debug("... took {} ms", time);
nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
+ boolean addExcerpt = filter.getQueryStatement() != null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT);
for (ScoreDoc doc : docs.scoreDocs) {
- LuceneResultRow row = convertToRow(doc, searcher);
+ String excerpt = null;
+ if (addExcerpt) {
+ excerpt = getExcerpt(indexNode, searcher, query, doc);
+ }
+
+ LuceneResultRow row = convertToRow(doc, searcher, excerpt);
if (row != null) {
queue.add(row);
}
@@ -476,6 +493,35 @@
return new LucenePathCursor(itr, settings, sizeEstimator);
}
+ private String getExcerpt(IndexNode indexNode, IndexSearcher searcher, Query query, ScoreDoc doc) throws IOException {
+ StringBuilder excerpt = new StringBuilder();
+ QueryScorer scorer = new QueryScorer(query);
+ scorer.setExpandMultiTermQuery(true);
+ highlighter.setFragmentScorer(scorer);
+
+ for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields())
+ if (!FieldNames.SUGGEST.equals(field.name())) {
+ try {
+ Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
+ TokenStream tokenStream = analyzer.tokenStream(field.name(), field.stringValue());
+ tokenStream.reset();
+ CachingTokenFilter cachingTokenFilter = new CachingTokenFilter(tokenStream);
+ TextFragment[] textFragments = highlighter.getBestTextFragments(cachingTokenFilter, field.stringValue(), true, 2);
+ if (textFragments != null && textFragments.length > 0) {
+ for (TextFragment fragment : textFragments) {
+ if (excerpt.length() > 0) {
+ excerpt.append("...");
+ }
+ excerpt.append(fragment.toString());
+ }
+ }
+ } catch (InvalidTokenOffsetsException e) {
+ LOG.error("higlighting failed", e);
+ }
+ }
+ return excerpt.toString();
+ }
+
protected static IndexPlan.Builder planBuilder(Filter filter){
return new IndexPlan.Builder()
.setCostPerExecution(0) // we're local. Low-cost
@@ -1039,11 +1085,13 @@
final double score;
final Iterable suggestWords;
final boolean isVirtual;
+ final String excerpt;
- LuceneResultRow(String path, double score) {
+ LuceneResultRow(String path, double score, String excerpt) {
this.isVirtual = false;
this.path = path;
this.score = score;
+ this.excerpt = excerpt;
this.suggestWords = Collections.emptySet();
}
@@ -1052,6 +1100,7 @@
this.path = "/";
this.score = 1.0d;
this.suggestWords = suggestWords;
+ this.excerpt = null;
}
@Override
@@ -1130,6 +1179,9 @@
if (QueryImpl.REP_SPELLCHECK.equals(columnName) || QueryImpl.REP_SUGGEST.equals(columnName)) {
return PropertyValues.newString(Iterables.toString(currentRow.suggestWords));
}
+ if (QueryImpl.REP_EXCERPT.equals(columnName)) {
+ return PropertyValues.newString(currentRow.excerpt);
+ }
return pathRow.getValue(columnName);
}
Index: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
===================================================================
--- oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java (revision 1712931)
+++ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java (working copy)
@@ -18,6 +18,10 @@
*/
package org.apache.jackrabbit.oak.plugins.index.lucene;
+import javax.annotation.CheckForNull;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import javax.jcr.PropertyType;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -28,17 +32,11 @@
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
-import javax.annotation.CheckForNull;
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-import javax.jcr.PropertyType;
-
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;
-
import org.apache.jackrabbit.oak.api.PropertyValue;
import org.apache.jackrabbit.oak.api.Result.SizePrecision;
import org.apache.jackrabbit.oak.api.Type;
@@ -70,10 +68,13 @@
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.util.PerfLogger;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CachingTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
@@ -98,6 +99,12 @@
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.util.Version;
@@ -114,6 +121,7 @@
import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot;
import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.SUGGEST;
import static org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.NATIVE_SORT_ORDER;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newAncestorTerm;
@@ -121,9 +129,7 @@
import static org.apache.jackrabbit.oak.query.QueryImpl.JCR_PATH;
import static org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvancedQueryIndex;
import static org.apache.jackrabbit.oak.spi.query.QueryIndex.NativeQueryIndex;
-import static org.apache.lucene.search.BooleanClause.Occur.MUST;
-import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
-import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
+import static org.apache.lucene.search.BooleanClause.Occur.*;
/**
* Provides a QueryIndex that does lookups against a Lucene-based index
@@ -166,7 +172,7 @@
*/
public class LucenePropertyIndex implements AdvancedQueryIndex, QueryIndex, NativeQueryIndex,
AdvanceFulltextQueryIndex {
-
+
private static double MIN_COST = 2.1;
private static final Logger LOG = LoggerFactory
@@ -185,6 +191,9 @@
private final ScorerProviderFactory scorerProviderFactory;
+ private final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("", ""),
+ new SimpleHTMLEncoder(), null);
+
public LucenePropertyIndex(IndexTracker tracker) {
this.tracker = tracker;
this.scorerProviderFactory = ScorerProviderFactory.DEFAULT;
@@ -253,7 +262,7 @@
.append(path)
.append(") ");
sb.append(getLuceneRequest(plan, null));
- if(plan.getSortOrder() != null && !plan.getSortOrder().isEmpty()){
+ if (plan.getSortOrder() != null && !plan.getSortOrder().isEmpty()) {
sb.append(" ordering:").append(plan.getSortOrder());
}
if (ft != null) {
@@ -292,7 +301,7 @@
return endOfData();
}
- private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher) throws IOException {
+ private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException {
IndexReader reader = searcher.getIndexReader();
//TODO Look into usage of field cache for retrieving the path
//instead of reading via reader if no of docs in index are limited
@@ -307,13 +316,13 @@
String originalPath = path;
path = pr.transformPath(path);
- if (path == null){
+ if (path == null) {
LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
return null;
}
// avoid duplicate entries
- if (seenPaths.contains(path)){
+ if (seenPaths.contains(path)) {
LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
return null;
}
@@ -321,7 +330,7 @@
}
LOG.trace("Matched path {}", path);
- return new LuceneResultRow(path, doc.score);
+ return new LuceneResultRow(path, doc.score, excerpt);
}
return null;
}
@@ -338,7 +347,7 @@
ScoreDoc lastDocToRecord = null;
- IndexNode indexNode = acquireIndexNode(plan);
+ final IndexNode indexNode = acquireIndexNode(plan);
checkState(indexNode != null);
try {
IndexSearcher searcher = indexNode.getSearcher();
@@ -375,8 +384,14 @@
PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);
+ boolean addExcerpt = filter.getQueryStatement() != null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT);
for (ScoreDoc doc : docs.scoreDocs) {
- LuceneResultRow row = convertToRow(doc, searcher);
+ String excerpt = null;
+ if (addExcerpt) {
+ excerpt = getExcerpt(indexNode, searcher, query, doc);
+ }
+
+ LuceneResultRow row = convertToRow(doc, searcher, excerpt);
if (row != null) {
queue.add(row);
}
@@ -454,7 +469,7 @@
private void checkForIndexVersionChange(IndexSearcher searcher) {
long currentVersion = getVersion(searcher);
- if (currentVersion != lastSearchIndexerVersion && lastDoc != null){
+ if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
lastDoc = null;
LOG.debug("Change in index version detected {} => {}. Query would be performed without " +
"offset", currentVersion, lastSearchIndexerVersion);
@@ -474,7 +489,7 @@
Query query = (Query) luceneRequestFacade.getLuceneRequest();
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(query, collector);
- int totalHits = collector.getTotalHits();
+ int totalHits = collector.getTotalHits();
LOG.debug("Estimated size for query {} is {}", query, totalHits);
return totalHits;
}
@@ -490,6 +505,35 @@
return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}
+ private String getExcerpt(IndexNode indexNode, IndexSearcher searcher, Query query, ScoreDoc doc) throws IOException {
+ StringBuilder excerpt = new StringBuilder();
+ QueryScorer scorer = new QueryScorer(query);
+ scorer.setExpandMultiTermQuery(true);
+ highlighter.setFragmentScorer(scorer);
+
+ for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields())
+ if (!SUGGEST.equals(field.name())) {
+ try {
+ Analyzer analyzer = indexNode.getDefinition().getAnalyzer();
+ TokenStream tokenStream = analyzer.tokenStream(field.name(), field.stringValue());
+ tokenStream.reset();
+ CachingTokenFilter cachingTokenFilter = new CachingTokenFilter(tokenStream);
+ TextFragment[] textFragments = highlighter.getBestTextFragments(cachingTokenFilter, field.stringValue(), true, 2);
+ if (textFragments != null && textFragments.length > 0) {
+ for (TextFragment fragment : textFragments) {
+ if (excerpt.length() > 0) {
+ excerpt.append("...");
+ }
+ excerpt.append(fragment.toString());
+ }
+ }
+ } catch (InvalidTokenOffsetsException e) {
+ LOG.error("higlighting failed", e);
+ }
+ }
+ return excerpt.toString();
+ }
+
@Override
public NodeAggregator getNodeAggregator() {
return null;
@@ -502,7 +546,7 @@
*
* @return true if the term is related to node
*/
- public static boolean isNodePath(String fulltextTermPath){
+ public static boolean isNodePath(String fulltextTermPath) {
return fulltextTermPath.endsWith("/*");
}
@@ -564,7 +608,7 @@
}
}
- private static String getIndexName(IndexPlan plan){
+ private static String getIndexName(IndexPlan plan) {
return PathUtils.getName(getPlanResult(plan).indexPath);
}
@@ -650,7 +694,7 @@
}
if (qs.size() == 0) {
- if (reader == null){
+ if (reader == null) {
//When called in planning mode then some queries like rep:similar
//cannot create query as reader is not provided. In such case we
//just return match all queries
@@ -670,7 +714,7 @@
/**
* Perform additional wraps on the list of queries to allow, for example, the NOT CONTAINS to
* play properly when sent to lucene.
- *
+ *
* @param qs the list of queries. Cannot be null.
* @return
*/
@@ -704,7 +748,7 @@
}
if (!unwrapped) {
- bq.add(q, MUST);
+ bq.add(q, MUST);
}
}
return new LuceneRequestFacade(bq);
@@ -712,7 +756,7 @@
/**
* unwraps any NOT clauses from the provided boolean query into another boolean query.
- *
+ *
* @param input the query to be analysed for the existence of NOT clauses. Cannot be null.
* @param output the query where the unwrapped NOTs will be saved into. Cannot be null.
* @return true if there where at least one unwrapped NOT. false otherwise.
@@ -727,23 +771,23 @@
unwrapped = true;
}
}
-
+
return unwrapped;
}
-
+
private CustomScoreQuery getCustomScoreQuery(IndexPlan plan, Query subQuery) {
PlanResult planResult = getPlanResult(plan);
IndexDefinition idxDef = planResult.indexDefinition;
String providerName = idxDef.getScorerProviderName();
if (scorerProviderFactory != null && providerName != null) {
- return scorerProviderFactory.getScorerProvider(providerName)
- .createCustomScoreQuery(subQuery);
+ return scorerProviderFactory.getScorerProvider(providerName)
+ .createCustomScoreQuery(subQuery);
}
return null;
}
private static void addNonFullTextConstraints(List qs,
- IndexPlan plan, IndexReader reader) {
+ IndexPlan plan, IndexReader reader) {
Filter filter = plan.getFilter();
PlanResult planResult = getPlanResult(plan);
IndexDefinition defn = planResult.indexDefinition;
@@ -753,37 +797,37 @@
String path = getPathRestriction(plan);
switch (filter.getPathRestriction()) {
- case ALL_CHILDREN:
- if (defn.evaluatePathRestrictions()) {
- if ("/".equals(path)) {
- break;
+ case ALL_CHILDREN:
+ if (defn.evaluatePathRestrictions()) {
+ if ("/".equals(path)) {
+ break;
+ }
+ qs.add(new TermQuery(newAncestorTerm(path)));
}
- qs.add(new TermQuery(newAncestorTerm(path)));
- }
- break;
- case DIRECT_CHILDREN:
- if (defn.evaluatePathRestrictions()) {
- BooleanQuery bq = new BooleanQuery();
- bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST));
- bq.add(new BooleanClause(newDepthQuery(path), BooleanClause.Occur.MUST));
- qs.add(bq);
- }
- break;
- case EXACT:
- qs.add(new TermQuery(newPathTerm(path)));
- break;
- case PARENT:
- if (denotesRoot(path)) {
- // there's no parent of the root node
- // we add a path that can not possibly occur because there
- // is no way to say "match no documents" in Lucene
- qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
- } else {
- qs.add(new TermQuery(newPathTerm(getParentPath(path))));
- }
- break;
- case NO_RESTRICTION:
- break;
+ break;
+ case DIRECT_CHILDREN:
+ if (defn.evaluatePathRestrictions()) {
+ BooleanQuery bq = new BooleanQuery();
+ bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST));
+ bq.add(new BooleanClause(newDepthQuery(path), BooleanClause.Occur.MUST));
+ qs.add(bq);
+ }
+ break;
+ case EXACT:
+ qs.add(new TermQuery(newPathTerm(path)));
+ break;
+ case PARENT:
+ if (denotesRoot(path)) {
+ // there's no parent of the root node
+ // we add a path that can not possibly occur because there
+ // is no way to say "match no documents" in Lucene
+ qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
+ } else {
+ qs.add(new TermQuery(newPathTerm(getParentPath(path))));
+ }
+ break;
+ case NO_RESTRICTION:
+ break;
}
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
@@ -817,7 +861,7 @@
continue;
}
}
-
+
PropertyDefinition pd = planResult.getPropDefn(pr);
if (pd == null) {
continue;
@@ -839,14 +883,14 @@
typeFromRestriction = pr.first.getType().tag();
} else if (pr.last != null && pr.last.getType() != Type.UNDEFINED) {
typeFromRestriction = pr.last.getType().tag();
- } else if (pr.list != null && !pr.list.isEmpty()){
+ } else if (pr.list != null && !pr.list.isEmpty()) {
typeFromRestriction = pr.list.get(0).getType().tag();
}
}
return getPropertyType(defn, pr.propertyName, typeFromRestriction);
}
- private static int getPropertyType(PropertyDefinition defn, String name, int defaultVal){
+ private static int getPropertyType(PropertyDefinition defn, String name, int defaultVal) {
if (defn.isTypeDefined()) {
return defn.getType();
}
@@ -887,13 +931,13 @@
PropertyDefinition defn) {
int propType = determinePropertyType(defn, pr);
- if (pr.isNullRestriction()){
+ if (pr.isNullRestriction()) {
return new TermQuery(new Term(FieldNames.NULL_PROPS, defn.name));
}
//If notNullCheckEnabled explicitly enabled use the simple TermQuery
//otherwise later fallback to range query
- if (pr.isNotNullRestriction() && defn.notNullCheckEnabled){
+ if (pr.isNotNullRestriction() && defn.notNullCheckEnabled) {
return new TermQuery(new Term(FieldNames.NOT_NULL_PROPS, defn.name));
}
@@ -1019,12 +1063,12 @@
}
}
}
- throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn );
+ throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn);
}
- static long getVersion(IndexSearcher indexSearcher){
+ static long getVersion(IndexSearcher indexSearcher) {
IndexReader reader = indexSearcher.getIndexReader();
- if (reader instanceof DirectoryReader){
+ if (reader instanceof DirectoryReader) {
return ((DirectoryReader) reader).getVersion();
}
return -1;
@@ -1042,11 +1086,11 @@
return createLikeQuery(FieldNames.NODE_NAME, first);
}
- throw new IllegalStateException("For nodeName queries only EQUALS and LIKE are supported "+pr);
+ throw new IllegalStateException("For nodeName queries only EQUALS and LIKE are supported " + pr);
}
private static void addReferenceConstraint(String uuid, List qs,
- IndexReader reader) {
+ IndexReader reader) {
if (reader == null) {
// getPlan call
qs.add(new TermQuery(new Term("*", uuid)));
@@ -1120,7 +1164,7 @@
if (x instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) x;
if ((bq.getClauses().length == 1) &&
- (bq.getClauses()[0].getOccur() == BooleanClause.Occur.MUST_NOT)) {
+ (bq.getClauses()[0].getOccur() == BooleanClause.Occur.MUST_NOT)) {
hasMustNot = true;
q.add(bq.getClauses()[0]);
}
@@ -1141,7 +1185,7 @@
private boolean visitTerm(String propertyName, String text, String boost, boolean not) {
String p = getLuceneFieldName(propertyName, pr);
- Query q = tokenToQuery(text, p, pr.indexingRule, analyzer);
+ Query q = tokenToQuery(text, p, pr.indexingRule, analyzer);
if (q == null) {
return false;
}
@@ -1162,12 +1206,12 @@
}
static String getLuceneFieldName(@Nullable String p, PlanResult pr) {
- if (p == null){
+ if (p == null) {
return FieldNames.FULLTEXT;
}
- if (isNodePath(p)){
- if (pr.isPathTransformed()){
+ if (isNodePath(p)) {
+ if (pr.isPathTransformed()) {
p = PathUtils.getName(p);
} else {
//Get rid of /* as aggregated fulltext field name is the
@@ -1175,13 +1219,13 @@
p = FieldNames.createFulltextFieldName(PathUtils.getParentPath(p));
}
} else {
- if (pr.isPathTransformed()){
+ if (pr.isPathTransformed()) {
p = PathUtils.getName(p);
}
p = FieldNames.createAnalyzedFieldName(p);
}
- if ("*".equals(p)){
+ if ("*".equals(p)) {
p = FieldNames.FULLTEXT;
}
return p;
@@ -1226,7 +1270,7 @@
/**
* Following logic is taken from org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser#parse(java.lang.String)
*/
- private static String rewriteQueryText(String textsearch){
+ private static String rewriteQueryText(String textsearch) {
// replace escaped ' with just '
StringBuilder rewritten = new StringBuilder();
// the default lucene query parser recognizes 'AND' and 'NOT' as
@@ -1281,8 +1325,10 @@
final double score;
final Iterable suggestWords;
final boolean isVirutal;
+ final String excerpt;
- LuceneResultRow(String path, double score) {
+ LuceneResultRow(String path, double score, String excerpt) {
+ this.excerpt = excerpt;
this.isVirutal = false;
this.path = path;
this.score = score;
@@ -1294,6 +1340,7 @@
this.path = "/";
this.score = 1.0d;
this.suggestWords = suggestWords;
+ this.excerpt = null;
}
@Override
@@ -1379,6 +1426,9 @@
if (QueryImpl.REP_SPELLCHECK.equals(columnName) || QueryImpl.REP_SUGGEST.equals(columnName)) {
return PropertyValues.newString(Iterables.toString(currentRow.suggestWords));
}
+ if (QueryImpl.REP_EXCERPT.equals(columnName)) {
+ return PropertyValues.newString(currentRow.excerpt);
+ }
return pathRow.getValue(columnName);
}
Index: oak-solr-core/pom.xml
===================================================================
--- oak-solr-core/pom.xml (revision 1712931)
+++ oak-solr-core/pom.xml (working copy)
@@ -58,6 +58,8 @@
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots
org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase
+ org.apache.jackrabbit.core.query.ExcerptTest#testQuotedPhrase
+ org.apache.jackrabbit.core.query.ExcerptTest#testEncodeIllegalCharsHighlights
Index: oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java
===================================================================
--- oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java (revision 1712931)
+++ oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java (working copy)
@@ -20,6 +20,7 @@
import java.util.List;
import org.apache.jackrabbit.oak.plugins.index.solr.configuration.OakSolrConfiguration;
+import org.apache.jackrabbit.oak.query.QueryImpl;
import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd;
import org.apache.jackrabbit.oak.query.fulltext.FullTextContains;
import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
@@ -154,6 +155,22 @@
if (SolrQueryIndex.isIgnoredProperty(pr.propertyName, configuration)) {
continue;
}
+// } else if (QueryImpl.REP_EXCERPT.equals(pr.propertyName)) {
+// solrQuery.set("hl.fl", "*");
+// if (!solrQuery.getHighlight()) {
+// solrQuery.setHighlight(true);
+// solrQuery.setHighlightSimplePre("");
+// solrQuery.setHighlightSimplePost("");
+// }
+// } else if (pr.propertyName.startsWith(QueryImpl.REP_EXCERPT + "(")) {
+// String propertyName = pr.propertyName.substring(pr.propertyName.lastIndexOf('('), pr.propertyName.length() - 1);
+// solrQuery.set("hl.fl", propertyName);
+// if (!solrQuery.getHighlight()) {
+// solrQuery.setHighlight(true);
+// solrQuery.setHighlightSimplePre("");
+// solrQuery.setHighlightSimplePost("");
+// }
+// }
String first = null;
if (pr.first != null) {
@@ -217,6 +234,19 @@
solrQuery.addFilterQuery(ptQueryBuilder.toString());
}
+ if (filter.getQueryStatement() != null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT)) {
+ if (!solrQuery.getHighlight()) {
+ // enable highlighting
+ solrQuery.setHighlight(true);
+ // defaults
+ solrQuery.set("hl.fl", "*");
+ solrQuery.set("hl.encoder", "html");
+ solrQuery.set("hl.mergeContiguous", true);
+ solrQuery.setHighlightSimplePre("");
+ solrQuery.setHighlightSimplePost("");
+ }
+ }
+
if (configuration.useForPathRestrictions()) {
Filter.PathRestriction pathRestriction = filter.getPathRestriction();
if (pathRestriction != null) {
Index: oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
===================================================================
--- oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java (revision 1712931)
+++ oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java (working copy)
@@ -16,6 +16,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index.solr.query;
+import javax.annotation.CheckForNull;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -25,7 +26,6 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
-import javax.annotation.CheckForNull;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Iterables;
@@ -62,9 +62,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static org.apache.jackrabbit.oak.commons.PathUtils.getAncestorPath;
-import static org.apache.jackrabbit.oak.commons.PathUtils.getDepth;
-import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
+import static org.apache.jackrabbit.oak.commons.PathUtils.*;
/**
* A Solr based {@link QueryIndex}
@@ -137,11 +135,11 @@
}
// property restriction OR native language property restriction defined AND property restriction handled
- if (filter.getPropertyRestrictions() != null
+ if (filter.getPropertyRestrictions() != null
&& filter.getPropertyRestrictions().size() > 0
- && (filter.getPropertyRestriction(NATIVE_SOLR_QUERY) != null
+ && (filter.getPropertyRestriction(NATIVE_SOLR_QUERY) != null
|| filter.getPropertyRestriction(NATIVE_LUCENE_QUERY) != null
- || configuration.useForPropertyRestrictions())
+ || configuration.useForPropertyRestrictions())
&& !hasIgnoredProperties(filter.getPropertyRestrictions(), configuration)) {
match++;
}
@@ -163,7 +161,6 @@
}
-
return match;
}
@@ -324,7 +321,21 @@
onRetrievedDocs(filter, docs);
+ Map>> highlighting = queryResponse.getHighlighting();
for (SolrDocument doc : docs) {
+ // handle highlight
+ if (highlighting != null) {
+ Object pathObject = doc.getFieldValue(configuration.getPathField());
+ if (pathObject != null && highlighting.get(String.valueOf(pathObject)) != null) {
+ Map> value = highlighting.get(String.valueOf(pathObject));
+ for (Map.Entry> entry : value.entrySet()) {
+ // all highlighted values end up in 'rep:excerpt', regardless of field match
+ for (String v : entry.getValue()) {
+ doc.addField(QueryImpl.REP_EXCERPT, v);
+ }
+ }
+ }
+ }
SolrResultRow row = convertToRow(doc);
if (row != null) {
queue.add(row);
@@ -441,7 +452,7 @@
(!configuration.useForPropertyRestrictions() // Solr index not used for properties
|| (configuration.getUsedProperties().size() > 0 && !configuration.getUsedProperties().contains(propertyName)) // not explicitly contained in the used properties
|| propertyName.contains("/") // no child-level property restrictions
- || "rep:excerpt".equals(propertyName) // rep:excerpt is handled by the query engine
+ || "rep:excerpt".equals(propertyName) // rep:excerpt is not handled at the property level
|| QueryConstants.RESTRICTION_LOCAL_NAME.equals(propertyName)
|| configuration.getIgnoredProperties().contains(propertyName));
}
@@ -571,7 +582,23 @@
return PropertyValues.newDouble(currentRow.score);
}
Collection