Index: lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (revision 1459488) +++ lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (working copy) @@ -112,16 +112,26 @@ private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException { for (int n = 1; n < maxTopN; n++) { - FakePassageFormatter f1 = new FakePassageFormatter(); + final FakePassageFormatter f1 = new FakePassageFormatter(); PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1, - BreakIterator.getSentenceInstance(Locale.ROOT), - new PassageScorer(), - f1); - FakePassageFormatter f2 = new FakePassageFormatter(); + BreakIterator.getSentenceInstance(Locale.ROOT)) { + @Override + protected PassageFormatter getFormatter(String field) { + assertEquals("body", field); + return f1; + } + }; + + final FakePassageFormatter f2 = new FakePassageFormatter(); PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1, - BreakIterator.getSentenceInstance(Locale.ROOT), - new PassageScorer(), - f2); + BreakIterator.getSentenceInstance(Locale.ROOT)) { + @Override + protected PassageFormatter getFormatter(String field) { + assertEquals("body", field); + return f2; + } + }; + BooleanQuery bq = new BooleanQuery(false); bq.add(query, BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term("id", Integer.toString(doc))), BooleanClause.Occur.MUST); @@ -170,8 +180,7 @@ // we use a very simple analyzer. so we can assert the matches are correct int lastMatchStart = -1; for (int i = 0; i < p.getNumMatches(); i++) { - Term term = p.getMatchTerms()[i]; - assertEquals("body", term.field()); + BytesRef term = p.getMatchTerms()[i]; int matchStart = p.getMatchStarts()[i]; assertTrue(matchStart >= 0); // must at least start within the passage @@ -184,9 +193,8 @@ // single character terms assertEquals(matchStart+1, matchEnd); // and the offsets must be correct... - BytesRef bytes = term.bytes(); - assertEquals(1, bytes.length); - assertEquals((char)bytes.bytes[bytes.offset], Character.toLowerCase(content.charAt(matchStart))); + assertEquals(1, term.length); + assertEquals((char)term.bytes[term.offset], Character.toLowerCase(content.charAt(matchStart))); } // record just the start/end offset for simplicity seen.add(new Pair(p.getStartOffset(), p.getEndOffset())); @@ -262,9 +270,12 @@ IndexSearcher searcher = newSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighter(10000, - BreakIterator.getSentenceInstance(Locale.ROOT), - new PassageScorer(1.2f, 0, 87), - new PassageFormatter()); + BreakIterator.getSentenceInstance(Locale.ROOT)) { + @Override + protected PassageScorer getScorer(String field) { + return new PassageScorer(1.2f, 0, 87); + } + }; Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits); @@ -299,9 +310,12 @@ IndexSearcher searcher = newSearcher(ir); PostingsHighlighter highlighter = new PostingsHighlighter(10000, - BreakIterator.getSentenceInstance(Locale.ROOT), - new PassageScorer(0, 0.75f, 87), - new PassageFormatter()); + BreakIterator.getSentenceInstance(Locale.ROOT)) { + @Override + protected PassageScorer getScorer(String field) { + return new PassageScorer(0, 0.75f, 87); + } + }; BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term("body", "foo")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("body", "bar")), BooleanClause.Occur.SHOULD); Index: lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (revision 1459488) +++ lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (working copy) @@ -457,7 +457,7 @@ iw.close(); IndexSearcher searcher = newSearcher(ir); - PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter()); + PostingsHighlighter highlighter = new PostingsHighlighter(10000, null); Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits); @@ -527,7 +527,7 @@ IndexSearcher searcher = newSearcher(ir); - PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter()) { + PostingsHighlighter highlighter = new PostingsHighlighter(10000, null) { @Override protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException { assert fields.length == 1; @@ -636,7 +636,7 @@ iw.close(); IndexSearcher searcher = newSearcher(ir); - PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter()); + PostingsHighlighter highlighter = new PostingsHighlighter(10000, null); Query query = new TermQuery(new Term("body", "highlighting")); int[] docIDs = new int[] {0}; String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body"); Index: lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java =================================================================== --- lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java (revision 1459488) +++ lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java (working copy) @@ -17,8 +17,8 @@ * limitations under the License. */ -import org.apache.lucene.index.Term; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.SorterTemplate; @@ -36,15 +36,15 @@ int matchStarts[] = new int[8]; int matchEnds[] = new int[8]; - Term matchTerms[] = new Term[8]; + BytesRef matchTerms[] = new BytesRef[8]; int numMatches = 0; - void addMatch(int startOffset, int endOffset, Term term) { + void addMatch(int startOffset, int endOffset, BytesRef term) { assert startOffset >= this.startOffset && startOffset <= this.endOffset; if (numMatches == matchStarts.length) { matchStarts = ArrayUtil.grow(matchStarts, numMatches+1); matchEnds = ArrayUtil.grow(matchEnds, numMatches+1); - Term newMatchTerms[] = new Term[ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + BytesRef newMatchTerms[] = new BytesRef[ArrayUtil.oversize(numMatches+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches); matchTerms = newMatchTerms; } @@ -57,7 +57,7 @@ void sort() { final int starts[] = matchStarts; final int ends[] = matchEnds; - final Term terms[] = matchTerms; + final BytesRef terms[] = matchTerms; new SorterTemplate() { @Override protected void swap(int i, int j) { @@ -69,7 +69,7 @@ ends[i] = ends[j]; ends[j] = temp; - Term tempTerm = terms[i]; + BytesRef tempTerm = terms[i]; terms[i] = terms[j]; terms[j] = tempTerm; } @@ -155,11 +155,11 @@ } /** - * Term of the matches, corresponding with {@link #getMatchStarts()}. + * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}. *

* Only {@link #getNumMatches()} are valid. */ - public Term[] getMatchTerms() { + public BytesRef[] getMatchTerms() { return matchTerms; } } Index: lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java =================================================================== --- lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (revision 1459488) +++ lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (working copy) @@ -97,8 +97,14 @@ private final int maxLength; private final BreakIterator breakIterator; - private final PassageScorer scorer; - private final PassageFormatter formatter; + + /** Set the first time {@link getFormatter} is called, + * and then reused. */ + private PassageFormatter defaultFormatter; + + /** Set the first time {@link getScorer} is called, + * and then reused. */ + private PassageScorer defaultScorer; /** * Creates a new highlighter with default parameters. @@ -113,7 +119,7 @@ * @throws IllegalArgumentException if maxLength is negative or Integer.MAX_VALUE */ public PostingsHighlighter(int maxLength) { - this(maxLength, BreakIterator.getSentenceInstance(Locale.ROOT), new PassageScorer(), new PassageFormatter()); + this(maxLength, BreakIterator.getSentenceInstance(Locale.ROOT)); } /** @@ -122,11 +128,9 @@ * @param breakIterator used for finding passage * boundaries; pass null to highlight the entire * content as a single Passage. - * @param scorer used for ranking passages. - * @param formatter used for formatting passages into highlighted snippets. * @throws IllegalArgumentException if maxLength is negative or Integer.MAX_VALUE */ - public PostingsHighlighter(int maxLength, BreakIterator breakIterator, PassageScorer scorer, PassageFormatter formatter) { + public PostingsHighlighter(int maxLength, BreakIterator breakIterator) { if (maxLength < 0 || maxLength == Integer.MAX_VALUE) { // two reasons: no overflow problems in BreakIterator.preceding(offset+1), // our sentinel in the offsets queue uses this value to terminate. @@ -135,15 +139,32 @@ if (breakIterator == null) { breakIterator = new WholeBreakIterator(); } - if (scorer == null || formatter == null) { - throw new NullPointerException(); - } this.maxLength = maxLength; this.breakIterator = breakIterator; - this.scorer = scorer; - this.formatter = formatter; } + /** Returns the {@link PassageFormatter} to use for + * formatting passages into highlighted snippets. This + * returns a new {@code PassageFormatter} by default; + * subclasses can override to customize. */ + protected PassageFormatter getFormatter(String field) { + if (defaultFormatter == null) { + defaultFormatter = new PassageFormatter(); + } + return defaultFormatter; + } + + /** Returns the {@link PassageScorer} to use for + * ranking passages. This + * returns a new {@code PassageScorer} by default; + * subclasses can override to customize. */ + protected PassageScorer getScorer(String field) { + if (defaultScorer == null) { + defaultScorer = new PassageScorer(); + } + return defaultScorer; + } + /** * Highlights the top passages from a single field. * @@ -302,7 +323,13 @@ Term ceiling = new Term(field, UnicodeUtil.BIG_TERM); SortedSet fieldTerms = queryTerms.subSet(floor, ceiling); // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords) - Term terms[] = fieldTerms.toArray(new Term[fieldTerms.size()]); + + // Strip off the redundant field: + BytesRef terms[] = new BytesRef[fieldTerms.size()]; + int termUpto = 0; + for(Term term : fieldTerms) { + terms[termUpto++] = term.bytes(); + } Map fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages); String[] result = new String[docids.length]; @@ -333,7 +360,7 @@ return contents; } - private Map highlightField(String field, String contents[], BreakIterator bi, Term terms[], int[] docids, List leaves, int maxPassages) throws IOException { + private Map highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List leaves, int maxPassages) throws IOException { Map highlights = new HashMap(); // reuse in the real sense... for docs in same segment we just advance our old enum @@ -341,6 +368,11 @@ TermsEnum termsEnum = null; int lastLeaf = -1; + PassageFormatter fieldFormatter = getFormatter(field); + if (fieldFormatter == null) { + throw new NullPointerException("PassageFormatter cannot be null"); + } + for (int i = 0; i < docids.length; i++) { String content = contents[i]; if (content.length() == 0) { @@ -366,7 +398,7 @@ if (passages.length > 0) { // otherwise a null snippet (eg if field is missing // entirely from the doc) - highlights.put(doc, formatter.format(passages, content)); + highlights.put(doc, fieldFormatter.format(passages, content)); } lastLeaf = leaf; } @@ -377,8 +409,12 @@ // algorithm: treat sentence snippets as miniature documents // we can intersect these with the postings lists via BreakIterator.preceding(offset),s // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq)) - private Passage[] highlightDoc(String field, Term terms[], int contentLength, BreakIterator bi, int doc, + private Passage[] highlightDoc(String field, BytesRef terms[], int contentLength, BreakIterator bi, int doc, TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n) throws IOException { + PassageScorer scorer = getScorer(field); + if (scorer == null) { + throw new NullPointerException("PassageScorer cannot be null"); + } PriorityQueue pq = new PriorityQueue(); float weights[] = new float[terms.length]; // initialize postings @@ -389,7 +425,7 @@ continue; } else if (de == null) { postings[i] = EMPTY; // initially - if (!termsEnum.seekExact(terms[i].bytes(), true)) { + if (!termsEnum.seekExact(terms[i], true)) { continue; // term not found } de = postings[i] = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_OFFSETS);