Index: oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java (revision 1712931) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java (working copy) @@ -35,18 +35,18 @@ private final Query query; private final Tree[] trees; - + /** * The column values. */ private final PropertyValue[] values; - + /** * Whether the value at the given index is used for comparing rows (used * within hashCode and equals). If null, all columns are distinct. */ private final boolean[] distinctValues; - + /** * The values used for ordering. */ @@ -59,7 +59,7 @@ this.distinctValues = distinctValues; this.orderValues = orderValues; } - + PropertyValue[] getOrderValues() { return orderValues; } @@ -107,15 +107,22 @@ } // OAK-318: // somebody might call rep:excerpt(text) - // even thought the query doesn't contain that column + // even though the query doesn't contain that column if (columnName.startsWith(QueryImpl.REP_EXCERPT)) { - // missing excerpt, generate a default value - String ex = SimpleExcerptProvider.getExcerpt(getPath(), columnName, - query, true); - if (ex != null) { - return PropertyValues.newString(ex); + int columnIndex = query.getColumnIndex(QueryImpl.REP_EXCERPT); + if (columnIndex >= 0 && QueryImpl.REP_EXCERPT.equals(columnName) || SimpleExcerptProvider.REP_EXCERPT_FN. + equals(columnName)) { + return SimpleExcerptProvider.getExcerpt(values[columnIndex]); + // TODO : make it possible to extract property level excerpts, e.g. rep:excerpt(text) from indexes + } else { + // missing excerpt, generate a default value + String ex = SimpleExcerptProvider.getExcerpt(getPath(), columnName, + query, true); + if (ex != null) { + return PropertyValues.newString(ex); + } + return PropertyValues.newString(getPath()); } - return PropertyValues.newString(getPath()); } throw new IllegalArgumentException("Column not found: " + columnName); } @@ -146,8 +153,8 @@ } return buff.toString(); } - + @Override public int hashCode() { int result = 1; @@ -155,7 +162,7 @@ result = 31 * result + hashCodeOfValues(); return result; } - + private int hashCodeOfValues() { int result = 1; for (int i = 0; i < values.length; i++) { @@ -249,4 +256,4 @@ } -} +} \ No newline at end of file Index: oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java (revision 1712931) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java (working copy) @@ -18,7 +18,6 @@ */ package org.apache.jackrabbit.oak.query.ast; -import static com.google.common.collect.Lists.newArrayList; import static org.apache.jackrabbit.oak.query.ast.AstElementFactory.copyElementAndCheckReference; import java.util.Collections; Index: oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java (revision 1712931) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java (working copy) @@ -16,13 +16,16 @@ */ package org.apache.jackrabbit.oak.query.fulltext; -import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters; - import java.util.BitSet; import java.util.HashSet; +import java.util.List; +import java.util.Map; import java.util.Set; +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableSet; import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.PropertyValue; import org.apache.jackrabbit.oak.api.Tree; import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.commons.PathUtils; @@ -33,20 +36,24 @@ import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl; import org.apache.jackrabbit.oak.query.ast.LiteralImpl; import org.apache.jackrabbit.oak.query.ast.OrImpl; +import org.apache.jackrabbit.oak.spi.query.PropertyValues; -import com.google.common.collect.ImmutableSet; +import static com.google.common.collect.Maps.newHashMap; +import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters; /** * This class can extract excerpts from node. */ public class SimpleExcerptProvider { - private static final String REP_EXCERPT_FN = "rep:excerpt(.)"; + public static final String REP_EXCERPT_FN = "rep:excerpt(.)"; + public static final String EXCERPT_END = ""; + public static final String EXCERPT_BEGIN = "
"; private static int maxFragmentSize = 150; public static String getExcerpt(String path, String columnName, - Query query, boolean highlight) { + Query query, boolean highlight) { if (path == null) { return null; } @@ -72,7 +79,7 @@ for (PropertyState p : t.getProperties()) { if (p.getType().tag() == Type.STRING.tag() && (columnName == null || columnName.equalsIgnoreCase(p - .getName()))) { + .getName()))) { text.append(separator); separator = " "; for (String v : p.getValue(Type.STRINGS)) { @@ -82,8 +89,7 @@ } Set searchToken = extractFulltext(query); if (highlight && searchToken != null) { - String h = highlight(text, searchToken); - return h; + return highlight(text, searchToken); } return noHighlight(text); } @@ -140,32 +146,32 @@ Set out = new HashSet(); StringBuilder token = new StringBuilder(); boolean quote = false; - for (int i = 0; i < in.length();) { + for (int i = 0; i < in.length(); ) { final int c = in.codePointAt(i); int length = Character.charCount(c); switch (c) { - case ' ': - if (quote) { - token.append(' '); - } else if (token.length() > 0) { - out.add(token.toString()); - token = new StringBuilder(); - } - break; - case '"': - case '\'': - if (quote) { - quote = false; - if (token.length() > 0) { + case ' ': + if (quote) { + token.append(' '); + } else if (token.length() > 0) { out.add(token.toString()); token = new StringBuilder(); } - } else { - quote = true; - } - break; - default: - token.append(new String(Character.toChars(c))); + break; + case '"': + case '\'': + if (quote) { + quote = false; + if (token.length() > 0) { + out.add(token.toString()); + token = new StringBuilder(); + } + } else { + quote = true; + } + break; + default: + token.append(new String(Character.toChars(c))); } i += length; } @@ -198,7 +204,7 @@ for (String token : tokens) { highlight(escaped, highlight, token); } - StringBuilder excerpt = new StringBuilder("
"); + StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN); boolean strong = false; for (int i = 0; i < escaped.length(); i++) { if (highlight.get(i) && !strong) { @@ -213,10 +219,10 @@ if (strong) { excerpt.append(""); } - excerpt.append("
"); + excerpt.append(EXCERPT_END); return excerpt.toString(); } - + private static void highlight(String text, BitSet highlightBits, String token) { boolean isLike = false; if (token.endsWith("*")) { @@ -247,5 +253,55 @@ } } } - + + public static PropertyValue getExcerpt(PropertyValue value) { + Splitter listSplitter = Splitter.on(',').trimResults().omitEmptyStrings(); + StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN); + for (String v : listSplitter.splitToList(value.toString())) { + excerpt.append(v); + } + excerpt.append(EXCERPT_END); + return PropertyValues.newString(excerpt.toString()); + } + + public static PropertyValue getExcerpt(String columnName, PropertyValue value) { + StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN); + String property = extractExcerptProperty(columnName); + + Splitter listSplitter = Splitter.on(',').trimResults().omitEmptyStrings(); + List values = listSplitter.splitToList(value.toString()); + for (String rev : values) { + String substring = rev.substring(1, rev.length() - 1); + Map m1 = listSplitter.withKeyValueSeparator(':') + .split(substring); + Map> m2 = newHashMap(); + for (Map.Entry entry : m1.entrySet()) { + String k = entry.getKey(); + String entryValue = entry.getValue(); + List v = listSplitter.splitToList(entryValue); + m2.put(k, v); + } + if (m2.size() > 0) { + if (property != null) { + List propertyValues = m2.get(property); + if (addValues(excerpt, propertyValues)) break; + } else { + List propertyValues = m2.values().iterator().next(); + if (addValues(excerpt, propertyValues)) break; + } + } + } + excerpt.append(EXCERPT_END); + return PropertyValues.newString(excerpt.toString()); + } + + private static boolean addValues(StringBuilder excerpt, List propertyValues) { + if (propertyValues != null && propertyValues.size() > 0) { + for (String pv : propertyValues) { + excerpt.append(pv); + } + return true; + } + return false; + } } Index: oak-lucene/pom.xml =================================================================== --- oak-lucene/pom.xml (revision 1712931) +++ oak-lucene/pom.xml (working copy) @@ -42,6 +42,9 @@ org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase + org.apache.jackrabbit.core.query.ExcerptTest#testQuotedPhrase + org.apache.jackrabbit.core.query.ExcerptTest#testHighlightJa + org.apache.jackrabbit.core.query.ExcerptTest#testEncodeIllegalCharsHighlights org.apache.jackrabbit.core.query.QueryResultTest#testSkip org.apache.jackrabbit.core.query.DerefTest#testDeref org.apache.jackrabbit.core.query.DerefTest#testDerefInPredicate @@ -206,6 +209,11 @@ ${lucene.version} provided + + org.apache.lucene + lucene-highlighter + ${lucene.version} + Index: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java =================================================================== --- oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java (revision 1712931) +++ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java (working copy) @@ -73,12 +73,14 @@ import org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvanceFulltextQueryIndex; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; @@ -98,6 +100,12 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLEncoder; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.TextFragment; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.util.Version; @@ -172,6 +180,9 @@ private final NodeAggregator aggregator; + private final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("", ""), + new SimpleHTMLEncoder(), null); + public LuceneIndex(IndexTracker tracker, NodeAggregator aggregator) { this.tracker = tracker; this.aggregator = aggregator; @@ -298,7 +309,7 @@ return endOfData(); } - private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher) throws IOException { + private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException { IndexReader reader = searcher.getIndexReader(); PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); @@ -323,7 +334,7 @@ seenPaths.add(path); } - return new LuceneResultRow(path, doc.score); + return new LuceneResultRow(path, doc.score, excerpt); } return null; } @@ -363,8 +374,14 @@ LOG.debug("... took {} ms", time); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); + boolean addExcerpt = filter.getQueryStatement() != null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT); for (ScoreDoc doc : docs.scoreDocs) { - LuceneResultRow row = convertToRow(doc, searcher); + String excerpt = null; + if (addExcerpt) { + excerpt = getExcerpt(indexNode, searcher, query, doc); + } + + LuceneResultRow row = convertToRow(doc, searcher, excerpt); if (row != null) { queue.add(row); } @@ -476,6 +493,35 @@ return new LucenePathCursor(itr, settings, sizeEstimator); } + private String getExcerpt(IndexNode indexNode, IndexSearcher searcher, Query query, ScoreDoc doc) throws IOException { + StringBuilder excerpt = new StringBuilder(); + QueryScorer scorer = new QueryScorer(query); + scorer.setExpandMultiTermQuery(true); + highlighter.setFragmentScorer(scorer); + + for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) + if (!FieldNames.SUGGEST.equals(field.name())) { + try { + Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); + TokenStream tokenStream = analyzer.tokenStream(field.name(), field.stringValue()); + tokenStream.reset(); + CachingTokenFilter cachingTokenFilter = new CachingTokenFilter(tokenStream); + TextFragment[] textFragments = highlighter.getBestTextFragments(cachingTokenFilter, field.stringValue(), true, 2); + if (textFragments != null && textFragments.length > 0) { + for (TextFragment fragment : textFragments) { + if (excerpt.length() > 0) { + excerpt.append("..."); + } + excerpt.append(fragment.toString()); + } + } + } catch (InvalidTokenOffsetsException e) { + LOG.error("higlighting failed", e); + } + } + return excerpt.toString(); + } + protected static IndexPlan.Builder planBuilder(Filter filter){ return new IndexPlan.Builder() .setCostPerExecution(0) // we're local. Low-cost @@ -1039,11 +1085,13 @@ final double score; final Iterable suggestWords; final boolean isVirtual; + final String excerpt; - LuceneResultRow(String path, double score) { + LuceneResultRow(String path, double score, String excerpt) { this.isVirtual = false; this.path = path; this.score = score; + this.excerpt = excerpt; this.suggestWords = Collections.emptySet(); } @@ -1052,6 +1100,7 @@ this.path = "/"; this.score = 1.0d; this.suggestWords = suggestWords; + this.excerpt = null; } @Override @@ -1130,6 +1179,9 @@ if (QueryImpl.REP_SPELLCHECK.equals(columnName) || QueryImpl.REP_SUGGEST.equals(columnName)) { return PropertyValues.newString(Iterables.toString(currentRow.suggestWords)); } + if (QueryImpl.REP_EXCERPT.equals(columnName)) { + return PropertyValues.newString(currentRow.excerpt); + } return pathRow.getValue(columnName); } Index: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java =================================================================== --- oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java (revision 1712931) +++ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java (working copy) @@ -18,6 +18,10 @@ */ package org.apache.jackrabbit.oak.plugins.index.lucene; +import javax.annotation.CheckForNull; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import javax.jcr.PropertyType; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -28,17 +32,11 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicReference; -import javax.annotation.CheckForNull; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import javax.jcr.PropertyType; - import com.google.common.collect.AbstractIterator; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Queues; import com.google.common.collect.Sets; - import org.apache.jackrabbit.oak.api.PropertyValue; import org.apache.jackrabbit.oak.api.Result.SizePrecision; import org.apache.jackrabbit.oak.api.Type; @@ -70,10 +68,13 @@ import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.util.PerfLogger; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; @@ -98,6 +99,12 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLEncoder; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.TextFragment; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.util.Version; @@ -114,6 +121,7 @@ import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot; import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH; +import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.SUGGEST; import static org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.NATIVE_SORT_ORDER; import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION; import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newAncestorTerm; @@ -121,9 +129,7 @@ import static org.apache.jackrabbit.oak.query.QueryImpl.JCR_PATH; import static org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvancedQueryIndex; import static org.apache.jackrabbit.oak.spi.query.QueryIndex.NativeQueryIndex; -import static org.apache.lucene.search.BooleanClause.Occur.MUST; -import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT; -import static org.apache.lucene.search.BooleanClause.Occur.SHOULD; +import static org.apache.lucene.search.BooleanClause.Occur.*; /** * Provides a QueryIndex that does lookups against a Lucene-based index @@ -166,7 +172,7 @@ */ public class LucenePropertyIndex implements AdvancedQueryIndex, QueryIndex, NativeQueryIndex, AdvanceFulltextQueryIndex { - + private static double MIN_COST = 2.1; private static final Logger LOG = LoggerFactory @@ -185,6 +191,9 @@ private final ScorerProviderFactory scorerProviderFactory; + private final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("", ""), + new SimpleHTMLEncoder(), null); + public LucenePropertyIndex(IndexTracker tracker) { this.tracker = tracker; this.scorerProviderFactory = ScorerProviderFactory.DEFAULT; @@ -253,7 +262,7 @@ .append(path) .append(") "); sb.append(getLuceneRequest(plan, null)); - if(plan.getSortOrder() != null && !plan.getSortOrder().isEmpty()){ + if (plan.getSortOrder() != null && !plan.getSortOrder().isEmpty()) { sb.append(" ordering:").append(plan.getSortOrder()); } if (ft != null) { @@ -292,7 +301,7 @@ return endOfData(); } - private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher) throws IOException { + private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException { IndexReader reader = searcher.getIndexReader(); //TODO Look into usage of field cache for retrieving the path //instead of reading via reader if no of docs in index are limited @@ -307,13 +316,13 @@ String originalPath = path; path = pr.transformPath(path); - if (path == null){ + if (path == null) { LOG.trace("Ignoring path {} : Transformation returned null", originalPath); return null; } // avoid duplicate entries - if (seenPaths.contains(path)){ + if (seenPaths.contains(path)) { LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath); return null; } @@ -321,7 +330,7 @@ } LOG.trace("Matched path {}", path); - return new LuceneResultRow(path, doc.score); + return new LuceneResultRow(path, doc.score, excerpt); } return null; } @@ -338,7 +347,7 @@ ScoreDoc lastDocToRecord = null; - IndexNode indexNode = acquireIndexNode(plan); + final IndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); @@ -375,8 +384,14 @@ PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); + boolean addExcerpt = filter.getQueryStatement() != null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT); for (ScoreDoc doc : docs.scoreDocs) { - LuceneResultRow row = convertToRow(doc, searcher); + String excerpt = null; + if (addExcerpt) { + excerpt = getExcerpt(indexNode, searcher, query, doc); + } + + LuceneResultRow row = convertToRow(doc, searcher, excerpt); if (row != null) { queue.add(row); } @@ -454,7 +469,7 @@ private void checkForIndexVersionChange(IndexSearcher searcher) { long currentVersion = getVersion(searcher); - if (currentVersion != lastSearchIndexerVersion && lastDoc != null){ + if (currentVersion != lastSearchIndexerVersion && lastDoc != null) { lastDoc = null; LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset", currentVersion, lastSearchIndexerVersion); @@ -474,7 +489,7 @@ Query query = (Query) luceneRequestFacade.getLuceneRequest(); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); - int totalHits = collector.getTotalHits(); + int totalHits = collector.getTotalHits(); LOG.debug("Estimated size for query {} is {}", query, totalHits); return totalHits; } @@ -490,6 +505,35 @@ return new LucenePathCursor(itr, plan, settings, sizeEstimator); } + private String getExcerpt(IndexNode indexNode, IndexSearcher searcher, Query query, ScoreDoc doc) throws IOException { + StringBuilder excerpt = new StringBuilder(); + QueryScorer scorer = new QueryScorer(query); + scorer.setExpandMultiTermQuery(true); + highlighter.setFragmentScorer(scorer); + + for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) + if (!SUGGEST.equals(field.name())) { + try { + Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); + TokenStream tokenStream = analyzer.tokenStream(field.name(), field.stringValue()); + tokenStream.reset(); + CachingTokenFilter cachingTokenFilter = new CachingTokenFilter(tokenStream); + TextFragment[] textFragments = highlighter.getBestTextFragments(cachingTokenFilter, field.stringValue(), true, 2); + if (textFragments != null && textFragments.length > 0) { + for (TextFragment fragment : textFragments) { + if (excerpt.length() > 0) { + excerpt.append("..."); + } + excerpt.append(fragment.toString()); + } + } + } catch (InvalidTokenOffsetsException e) { + LOG.error("higlighting failed", e); + } + } + return excerpt.toString(); + } + @Override public NodeAggregator getNodeAggregator() { return null; @@ -502,7 +546,7 @@ * * @return true if the term is related to node */ - public static boolean isNodePath(String fulltextTermPath){ + public static boolean isNodePath(String fulltextTermPath) { return fulltextTermPath.endsWith("/*"); } @@ -564,7 +608,7 @@ } } - private static String getIndexName(IndexPlan plan){ + private static String getIndexName(IndexPlan plan) { return PathUtils.getName(getPlanResult(plan).indexPath); } @@ -650,7 +694,7 @@ } if (qs.size() == 0) { - if (reader == null){ + if (reader == null) { //When called in planning mode then some queries like rep:similar //cannot create query as reader is not provided. In such case we //just return match all queries @@ -670,7 +714,7 @@ /** * Perform additional wraps on the list of queries to allow, for example, the NOT CONTAINS to * play properly when sent to lucene. - * + * * @param qs the list of queries. Cannot be null. * @return */ @@ -704,7 +748,7 @@ } if (!unwrapped) { - bq.add(q, MUST); + bq.add(q, MUST); } } return new LuceneRequestFacade(bq); @@ -712,7 +756,7 @@ /** * unwraps any NOT clauses from the provided boolean query into another boolean query. - * + * * @param input the query to be analysed for the existence of NOT clauses. Cannot be null. * @param output the query where the unwrapped NOTs will be saved into. Cannot be null. * @return true if there where at least one unwrapped NOT. false otherwise. @@ -727,23 +771,23 @@ unwrapped = true; } } - + return unwrapped; } - + private CustomScoreQuery getCustomScoreQuery(IndexPlan plan, Query subQuery) { PlanResult planResult = getPlanResult(plan); IndexDefinition idxDef = planResult.indexDefinition; String providerName = idxDef.getScorerProviderName(); if (scorerProviderFactory != null && providerName != null) { - return scorerProviderFactory.getScorerProvider(providerName) - .createCustomScoreQuery(subQuery); + return scorerProviderFactory.getScorerProvider(providerName) + .createCustomScoreQuery(subQuery); } return null; } private static void addNonFullTextConstraints(List qs, - IndexPlan plan, IndexReader reader) { + IndexPlan plan, IndexReader reader) { Filter filter = plan.getFilter(); PlanResult planResult = getPlanResult(plan); IndexDefinition defn = planResult.indexDefinition; @@ -753,37 +797,37 @@ String path = getPathRestriction(plan); switch (filter.getPathRestriction()) { - case ALL_CHILDREN: - if (defn.evaluatePathRestrictions()) { - if ("/".equals(path)) { - break; + case ALL_CHILDREN: + if (defn.evaluatePathRestrictions()) { + if ("/".equals(path)) { + break; + } + qs.add(new TermQuery(newAncestorTerm(path))); } - qs.add(new TermQuery(newAncestorTerm(path))); - } - break; - case DIRECT_CHILDREN: - if (defn.evaluatePathRestrictions()) { - BooleanQuery bq = new BooleanQuery(); - bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST)); - bq.add(new BooleanClause(newDepthQuery(path), BooleanClause.Occur.MUST)); - qs.add(bq); - } - break; - case EXACT: - qs.add(new TermQuery(newPathTerm(path))); - break; - case PARENT: - if (denotesRoot(path)) { - // there's no parent of the root node - // we add a path that can not possibly occur because there - // is no way to say "match no documents" in Lucene - qs.add(new TermQuery(new Term(FieldNames.PATH, "///"))); - } else { - qs.add(new TermQuery(newPathTerm(getParentPath(path)))); - } - break; - case NO_RESTRICTION: - break; + break; + case DIRECT_CHILDREN: + if (defn.evaluatePathRestrictions()) { + BooleanQuery bq = new BooleanQuery(); + bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST)); + bq.add(new BooleanClause(newDepthQuery(path), BooleanClause.Occur.MUST)); + qs.add(bq); + } + break; + case EXACT: + qs.add(new TermQuery(newPathTerm(path))); + break; + case PARENT: + if (denotesRoot(path)) { + // there's no parent of the root node + // we add a path that can not possibly occur because there + // is no way to say "match no documents" in Lucene + qs.add(new TermQuery(new Term(FieldNames.PATH, "///"))); + } else { + qs.add(new TermQuery(newPathTerm(getParentPath(path)))); + } + break; + case NO_RESTRICTION: + break; } for (PropertyRestriction pr : filter.getPropertyRestrictions()) { @@ -817,7 +861,7 @@ continue; } } - + PropertyDefinition pd = planResult.getPropDefn(pr); if (pd == null) { continue; @@ -839,14 +883,14 @@ typeFromRestriction = pr.first.getType().tag(); } else if (pr.last != null && pr.last.getType() != Type.UNDEFINED) { typeFromRestriction = pr.last.getType().tag(); - } else if (pr.list != null && !pr.list.isEmpty()){ + } else if (pr.list != null && !pr.list.isEmpty()) { typeFromRestriction = pr.list.get(0).getType().tag(); } } return getPropertyType(defn, pr.propertyName, typeFromRestriction); } - private static int getPropertyType(PropertyDefinition defn, String name, int defaultVal){ + private static int getPropertyType(PropertyDefinition defn, String name, int defaultVal) { if (defn.isTypeDefined()) { return defn.getType(); } @@ -887,13 +931,13 @@ PropertyDefinition defn) { int propType = determinePropertyType(defn, pr); - if (pr.isNullRestriction()){ + if (pr.isNullRestriction()) { return new TermQuery(new Term(FieldNames.NULL_PROPS, defn.name)); } //If notNullCheckEnabled explicitly enabled use the simple TermQuery //otherwise later fallback to range query - if (pr.isNotNullRestriction() && defn.notNullCheckEnabled){ + if (pr.isNotNullRestriction() && defn.notNullCheckEnabled) { return new TermQuery(new Term(FieldNames.NOT_NULL_PROPS, defn.name)); } @@ -1019,12 +1063,12 @@ } } } - throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn ); + throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn); } - static long getVersion(IndexSearcher indexSearcher){ + static long getVersion(IndexSearcher indexSearcher) { IndexReader reader = indexSearcher.getIndexReader(); - if (reader instanceof DirectoryReader){ + if (reader instanceof DirectoryReader) { return ((DirectoryReader) reader).getVersion(); } return -1; @@ -1042,11 +1086,11 @@ return createLikeQuery(FieldNames.NODE_NAME, first); } - throw new IllegalStateException("For nodeName queries only EQUALS and LIKE are supported "+pr); + throw new IllegalStateException("For nodeName queries only EQUALS and LIKE are supported " + pr); } private static void addReferenceConstraint(String uuid, List qs, - IndexReader reader) { + IndexReader reader) { if (reader == null) { // getPlan call qs.add(new TermQuery(new Term("*", uuid))); @@ -1120,7 +1164,7 @@ if (x instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) x; if ((bq.getClauses().length == 1) && - (bq.getClauses()[0].getOccur() == BooleanClause.Occur.MUST_NOT)) { + (bq.getClauses()[0].getOccur() == BooleanClause.Occur.MUST_NOT)) { hasMustNot = true; q.add(bq.getClauses()[0]); } @@ -1141,7 +1185,7 @@ private boolean visitTerm(String propertyName, String text, String boost, boolean not) { String p = getLuceneFieldName(propertyName, pr); - Query q = tokenToQuery(text, p, pr.indexingRule, analyzer); + Query q = tokenToQuery(text, p, pr.indexingRule, analyzer); if (q == null) { return false; } @@ -1162,12 +1206,12 @@ } static String getLuceneFieldName(@Nullable String p, PlanResult pr) { - if (p == null){ + if (p == null) { return FieldNames.FULLTEXT; } - if (isNodePath(p)){ - if (pr.isPathTransformed()){ + if (isNodePath(p)) { + if (pr.isPathTransformed()) { p = PathUtils.getName(p); } else { //Get rid of /* as aggregated fulltext field name is the @@ -1175,13 +1219,13 @@ p = FieldNames.createFulltextFieldName(PathUtils.getParentPath(p)); } } else { - if (pr.isPathTransformed()){ + if (pr.isPathTransformed()) { p = PathUtils.getName(p); } p = FieldNames.createAnalyzedFieldName(p); } - if ("*".equals(p)){ + if ("*".equals(p)) { p = FieldNames.FULLTEXT; } return p; @@ -1226,7 +1270,7 @@ /** * Following logic is taken from org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser#parse(java.lang.String) */ - private static String rewriteQueryText(String textsearch){ + private static String rewriteQueryText(String textsearch) { // replace escaped ' with just ' StringBuilder rewritten = new StringBuilder(); // the default lucene query parser recognizes 'AND' and 'NOT' as @@ -1281,8 +1325,10 @@ final double score; final Iterable suggestWords; final boolean isVirutal; + final String excerpt; - LuceneResultRow(String path, double score) { + LuceneResultRow(String path, double score, String excerpt) { + this.excerpt = excerpt; this.isVirutal = false; this.path = path; this.score = score; @@ -1294,6 +1340,7 @@ this.path = "/"; this.score = 1.0d; this.suggestWords = suggestWords; + this.excerpt = null; } @Override @@ -1379,6 +1426,9 @@ if (QueryImpl.REP_SPELLCHECK.equals(columnName) || QueryImpl.REP_SUGGEST.equals(columnName)) { return PropertyValues.newString(Iterables.toString(currentRow.suggestWords)); } + if (QueryImpl.REP_EXCERPT.equals(columnName)) { + return PropertyValues.newString(currentRow.excerpt); + } return pathRow.getValue(columnName); } Index: oak-solr-core/pom.xml =================================================================== --- oak-solr-core/pom.xml (revision 1712931) +++ oak-solr-core/pom.xml (working copy) @@ -58,6 +58,8 @@ org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase + org.apache.jackrabbit.core.query.ExcerptTest#testQuotedPhrase + org.apache.jackrabbit.core.query.ExcerptTest#testEncodeIllegalCharsHighlights Index: oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java =================================================================== --- oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java (revision 1712931) +++ oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java (working copy) @@ -20,6 +20,7 @@ import java.util.List; import org.apache.jackrabbit.oak.plugins.index.solr.configuration.OakSolrConfiguration; +import org.apache.jackrabbit.oak.query.QueryImpl; import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd; import org.apache.jackrabbit.oak.query.fulltext.FullTextContains; import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression; @@ -154,6 +155,22 @@ if (SolrQueryIndex.isIgnoredProperty(pr.propertyName, configuration)) { continue; } +// } else if (QueryImpl.REP_EXCERPT.equals(pr.propertyName)) { +// solrQuery.set("hl.fl", "*"); +// if (!solrQuery.getHighlight()) { +// solrQuery.setHighlight(true); +// solrQuery.setHighlightSimplePre(""); +// solrQuery.setHighlightSimplePost(""); +// } +// } else if (pr.propertyName.startsWith(QueryImpl.REP_EXCERPT + "(")) { +// String propertyName = pr.propertyName.substring(pr.propertyName.lastIndexOf('('), pr.propertyName.length() - 1); +// solrQuery.set("hl.fl", propertyName); +// if (!solrQuery.getHighlight()) { +// solrQuery.setHighlight(true); +// solrQuery.setHighlightSimplePre(""); +// solrQuery.setHighlightSimplePost(""); +// } +// } String first = null; if (pr.first != null) { @@ -217,6 +234,19 @@ solrQuery.addFilterQuery(ptQueryBuilder.toString()); } + if (filter.getQueryStatement() != null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT)) { + if (!solrQuery.getHighlight()) { + // enable highlighting + solrQuery.setHighlight(true); + // defaults + solrQuery.set("hl.fl", "*"); + solrQuery.set("hl.encoder", "html"); + solrQuery.set("hl.mergeContiguous", true); + solrQuery.setHighlightSimplePre(""); + solrQuery.setHighlightSimplePost(""); + } + } + if (configuration.useForPathRestrictions()) { Filter.PathRestriction pathRestriction = filter.getPathRestriction(); if (pathRestriction != null) { Index: oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java =================================================================== --- oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java (revision 1712931) +++ oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java (working copy) @@ -16,6 +16,7 @@ */ package org.apache.jackrabbit.oak.plugins.index.solr.query; +import javax.annotation.CheckForNull; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -25,7 +26,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import javax.annotation.CheckForNull; import com.google.common.collect.AbstractIterator; import com.google.common.collect.Iterables; @@ -62,9 +62,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.jackrabbit.oak.commons.PathUtils.getAncestorPath; -import static org.apache.jackrabbit.oak.commons.PathUtils.getDepth; -import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; +import static org.apache.jackrabbit.oak.commons.PathUtils.*; /** * A Solr based {@link QueryIndex} @@ -137,11 +135,11 @@ } // property restriction OR native language property restriction defined AND property restriction handled - if (filter.getPropertyRestrictions() != null + if (filter.getPropertyRestrictions() != null && filter.getPropertyRestrictions().size() > 0 - && (filter.getPropertyRestriction(NATIVE_SOLR_QUERY) != null + && (filter.getPropertyRestriction(NATIVE_SOLR_QUERY) != null || filter.getPropertyRestriction(NATIVE_LUCENE_QUERY) != null - || configuration.useForPropertyRestrictions()) + || configuration.useForPropertyRestrictions()) && !hasIgnoredProperties(filter.getPropertyRestrictions(), configuration)) { match++; } @@ -163,7 +161,6 @@ } - return match; } @@ -324,7 +321,21 @@ onRetrievedDocs(filter, docs); + Map>> highlighting = queryResponse.getHighlighting(); for (SolrDocument doc : docs) { + // handle highlight + if (highlighting != null) { + Object pathObject = doc.getFieldValue(configuration.getPathField()); + if (pathObject != null && highlighting.get(String.valueOf(pathObject)) != null) { + Map> value = highlighting.get(String.valueOf(pathObject)); + for (Map.Entry> entry : value.entrySet()) { + // all highlighted values end up in 'rep:excerpt', regardless of field match + for (String v : entry.getValue()) { + doc.addField(QueryImpl.REP_EXCERPT, v); + } + } + } + } SolrResultRow row = convertToRow(doc); if (row != null) { queue.add(row); @@ -441,7 +452,7 @@ (!configuration.useForPropertyRestrictions() // Solr index not used for properties || (configuration.getUsedProperties().size() > 0 && !configuration.getUsedProperties().contains(propertyName)) // not explicitly contained in the used properties || propertyName.contains("/") // no child-level property restrictions - || "rep:excerpt".equals(propertyName) // rep:excerpt is handled by the query engine + || "rep:excerpt".equals(propertyName) // rep:excerpt is not handled at the property level || QueryConstants.RESTRICTION_LOCAL_NAME.equals(propertyName) || configuration.getIgnoredProperties().contains(propertyName)); } @@ -571,7 +582,23 @@ return PropertyValues.newDouble(currentRow.score); } Collection fieldValues = currentRow.doc.getFieldValues(columnName); - return PropertyValues.newString(Iterables.toString(fieldValues != null ? fieldValues : Collections.emptyList())); + String value; + if (fieldValues != null && fieldValues.size() > 0) { + if (fieldValues.size() > 1) { + value = Iterables.toString(fieldValues); + } else { + Object fieldValue = currentRow.doc.getFieldValue(columnName); + if (fieldValue != null) { + value = fieldValue.toString(); + } else { + value = null; + } + } + } else { + value = Iterables.toString(Collections.emptyList()); + } + + return PropertyValues.newString(value); } }; Index: oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java =================================================================== --- oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java (revision 1712931) +++ oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java (working copy) @@ -83,7 +83,7 @@ Query q = qm.createQuery(xpath, Query.XPATH); String result = getResult(q.execute(), "rep:spellcheck()"); assertNotNull(result); - assertEquals("[voting in ontario]", result); + assertEquals("voting in ontario", result); } static String getResult(QueryResult result, String propertyName) throws RepositoryException { Index: oak-solr-core/src/test/resources/solr/oak/conf/schema.xml =================================================================== --- oak-solr-core/src/test/resources/solr/oak/conf/schema.xml (revision 1712931) +++ oak-solr-core/src/test/resources/solr/oak/conf/schema.xml (working copy) @@ -137,7 +137,7 @@ - + @@ -152,7 +152,7 @@ - + path_exact