Index: lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (revision 1457977) +++ lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (working copy) @@ -18,20 +18,24 @@ */ import java.io.BufferedReader; +import java.io.IOException; import java.io.InputStreamReader; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; @@ -43,6 +47,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase; @@ -465,4 +470,46 @@ ir.close(); dir.close(); } + + public void testCustomFieldValueSource() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + Document doc = new Document(); + + FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED); + offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + final String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test."; + Field body = new Field("body", text, offsetsType); + doc.add(body); + iw.addDocument(doc); + + IndexReader ir = iw.getReader(); + iw.close(); + + IndexSearcher searcher = newSearcher(ir); + + PostingsHighlighter highlighter = new PostingsHighlighter(10000, null, new PassageScorer(), new PassageFormatter()) { + @Override + protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException { + assert fields.length == 1; + assert docids.length == 1; + String[][] contents = new String[1][1]; + contents[0][0] = text; + return contents; + } + }; + + Query query = new TermQuery(new Term("body", "test")); + TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); + assertEquals(1, topDocs.totalHits); + String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2); + assertEquals(1, snippets.length); + assertEquals("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.", snippets[0]); + + ir.close(); + dir.close(); + } } Index: lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java =================================================================== --- lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (revision 1457977) +++ lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (working copy) @@ -81,7 +81,7 @@ * This is thread-safe, and can be used across different readers. * @lucene.experimental */ -public final class PostingsHighlighter { +public class PostingsHighlighter { // TODO: maybe allow re-analysis for tiny fields? currently we require offsets, // but if the analyzer is really fast and the field is tiny, this might really be @@ -257,15 +257,7 @@ Arrays.sort(fields); // pull stored data: - LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength); - String contents[][] = new String[fields.length][docids.length]; - for (int i = 0; i < docids.length; i++) { - searcher.doc(docids[i], visitor); - for (int j = 0; j < fields.length; j++) { - contents[j][i] = visitor.getValue(j).toString(); - } - visitor.reset(); - } + String[][] contents = loadFieldValues(searcher, fields, docids, maxLength); Map highlights = new HashMap(); for (int i = 0; i < fields.length; i++) { @@ -285,6 +277,25 @@ } return highlights; } + + /** Loads the String values for each field X docID to be + * highlighted. By default this loads from stored + * fields, but a subclass can change the source. This + * method should allocate the String[fields.length][docids.length] + * and fill all values. The returned Strings must be + * identical to what was indexed. */ + protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException { + String contents[][] = new String[fields.length][docids.length]; + LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength); + for (int i = 0; i < docids.length; i++) { + searcher.doc(docids[i], visitor); + for (int j = 0; j < fields.length; j++) { + contents[j][i] = visitor.getValue(j).toString(); + } + visitor.reset(); + } + return contents; + } private Map highlightField(String field, String contents[], BreakIterator bi, Term terms[], int[] docids, List leaves, int maxPassages) throws IOException { Map highlights = new HashMap();