Index: contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java =================================================================== --- contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (revision 982251) +++ contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.Query; +import org.apache.lucene.search.highlight.SimpleHTMLEncoder; public class SimpleFragmentsBuilderTest extends AbstractTestCase { @@ -59,6 +60,12 @@ assertEquals( "b b a b a b b b b b ", f[1] ); assertEquals( "c a a b b", f[2] ); } + + public void testEncoder() throws Exception { + FieldFragList ffl = ffl( "a", "

a

" ); + SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); + assertEquals( "<h1> a </h1>", sfb.createFragment( reader, 0, F, ffl, new SimpleHTMLEncoder() ) ); + } private FieldFragList ffl( String queryValue, String indexValue ) throws Exception { make1d1fIndex( indexValue ); Index: contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java =================================================================== --- contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (revision 982251) +++ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; +import org.apache.lucene.search.highlight.Encoder; /** * Another highlighter implementation. @@ -43,7 +44,7 @@ } /** - * a constructor. Using SimpleFragListBuilder and ScoreOrderFragmentsBuilder. + * a constructor. Using {@link SimpleFragListBuilder} and {@link ScoreOrderFragmentsBuilder}. * * @param phraseHighlight true or false for phrase highlighting * @param fieldMatch true of false for field matching @@ -53,12 +54,12 @@ } /** - * a constructor. A FragListBuilder and a FragmentsBuilder can be specified (plugins). + * a constructor. A {@link FragListBuilder} and a {@link FragmentsBuilder} can be specified (plugins). * * @param phraseHighlight true of false for phrase highlighting * @param fieldMatch true of false for field matching - * @param fragListBuilder an instance of FragListBuilder - * @param fragmentsBuilder an instance of FragmentsBuilder + * @param fragListBuilder an instance of {@link FragListBuilder} + * @param fragmentsBuilder an instance of {@link FragmentsBuilder} */ public FastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch, FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder ){ @@ -69,10 +70,10 @@ } /** - * create a FieldQuery object. + * create a {@link FieldQuery} object. * * @param query a query - * @return the created FieldQuery object + * @return the created {@link FieldQuery} object */ public FieldQuery getFieldQuery( Query query ){ return new FieldQuery( query, phraseHighlight, fieldMatch ); @@ -81,8 +82,8 @@ /** * return the best fragment. * - * @param fieldQuery FieldQuery object - * @param reader IndexReader of the index + * @param fieldQuery {@link FieldQuery} object + * @param reader {@link IndexReader} of the index * @param docId document id to be highlighted * @param fieldName field of the document to be highlighted * @param fragCharSize the length (number of chars) of a fragment @@ -98,8 +99,8 @@ /** * return the best fragments. * - * @param fieldQuery FieldQuery object - * @param reader IndexReader of the index + * @param fieldQuery {@link FieldQuery} object + * @param reader {@link IndexReader} of the index * @param docId document id to be highlighted * @param fieldName field of the document to be highlighted * @param fragCharSize the length (number of chars) of a fragment @@ -113,6 +114,44 @@ FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize ); return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments ); } + + /** + * return the best fragment. + * + * @param fieldQuery {@link FieldQuery} object + * @param reader {@link IndexReader} of the index + * @param docId document id to be highlighted + * @param fieldName field of the document to be highlighted + * @param fragCharSize the length (number of chars) of a fragment + * @param encoder an encoder that generates encoded text + * @return the best fragment (snippet) string + * @throws IOException + */ + public final String getBestFragment( final FieldQuery fieldQuery, IndexReader reader, int docId, + String fieldName, int fragCharSize, Encoder encoder ) throws IOException { + FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize ); + return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList, encoder ); + } + + /** + * return the best fragments. + * + * @param fieldQuery {@link FieldQuery} object + * @param reader {@link IndexReader} of the index + * @param docId document id to be highlighted + * @param fieldName field of the document to be highlighted + * @param fragCharSize the length (number of chars) of a fragment + * @param maxNumFragments maximum number of fragments + * @param encoder an encoder that generates encoded text + * @return created fragments or null when no fragments created. + * size of the array can be less than maxNumFragments + * @throws IOException + */ + public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId, + String fieldName, int fragCharSize, int maxNumFragments, Encoder encoder ) throws IOException { + FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize ); + return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments, encoder ); + } private FieldFragList getFieldFragList( final FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize ) throws IOException { Index: contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java =================================================================== --- contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (revision 982251) +++ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (working copy) @@ -25,6 +25,8 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.MapFieldSelector; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.highlight.DefaultEncoder; +import org.apache.lucene.search.highlight.Encoder; import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo; import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs; @@ -59,16 +61,29 @@ } public abstract List getWeightedFragInfoList( List src ); + + private static final Encoder NULL_ENCODER = new DefaultEncoder(); public String createFragment( IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList ) throws IOException { - String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1 ); + return createFragment( reader, docId, fieldName, fieldFragList, NULL_ENCODER ); + } + + public String[] createFragments( IndexReader reader, int docId, + String fieldName, FieldFragList fieldFragList, int maxNumFragments ) + throws IOException { + return createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments, NULL_ENCODER ); + } + + public String createFragment( IndexReader reader, int docId, + String fieldName, FieldFragList fieldFragList, Encoder encoder ) throws IOException { + String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1, encoder ); if( fragments == null || fragments.length == 0 ) return null; return fragments[0]; } public String[] createFragments( IndexReader reader, int docId, - String fieldName, FieldFragList fieldFragList, int maxNumFragments ) + String fieldName, FieldFragList fieldFragList, int maxNumFragments, Encoder encoder ) throws IOException { if( maxNumFragments < 0 ) throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." ); @@ -82,7 +97,7 @@ int[] nextValueIndex = { 0 }; for( int n = 0; n < maxNumFragments && n < fragInfos.size(); n++ ){ WeightedFragInfo fragInfo = fragInfos.get( n ); - fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo ) ); + fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo, encoder ) ); } return fragments.toArray( new String[fragments.size()] ); } @@ -102,25 +117,28 @@ @Deprecated protected String makeFragment( StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo ){ final int s = fragInfo.startOffset; - return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s ); + return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s, NULL_ENCODER ); } - protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo ){ + protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, Encoder encoder ){ final int s = fragInfo.startOffset; - return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s ); + return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s, encoder ); } - private String makeFragment( WeightedFragInfo fragInfo, String src, int s ){ + private String makeFragment( WeightedFragInfo fragInfo, String src, int s, Encoder encoder ){ StringBuilder fragment = new StringBuilder(); int srcIndex = 0; for( SubInfo subInfo : fragInfo.subInfos ){ for( Toffs to : subInfo.termsOffsets ){ - fragment.append( src.substring( srcIndex, to.startOffset - s ) ).append( getPreTag( subInfo.seqnum ) ) - .append( src.substring( to.startOffset - s, to.endOffset - s ) ).append( getPostTag( subInfo.seqnum ) ); + fragment + .append( encoder.encodeText( src.substring( srcIndex, to.startOffset - s ) ) ) + .append( getPreTag( subInfo.seqnum ) ) + .append( encoder.encodeText( src.substring( to.startOffset - s, to.endOffset - s ) ) ) + .append( getPostTag( subInfo.seqnum ) ); srcIndex = to.endOffset - s; } } - fragment.append( src.substring( srcIndex ) ); + fragment.append( encoder.encodeText( src.substring( srcIndex ) ) ); return fragment.toString(); } Index: contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java =================================================================== --- contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java (revision 982251) +++ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java (working copy) @@ -20,10 +20,12 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.highlight.Encoder; /** - * FragmentsBuilder is an interface for fragments (snippets) builder classes. - * A FragmentsBuilder class can be plugged in to Highlighter. + * {@link org.apache.lucene.search.vectorhighlight.FragmentsBuilder} is an interface for fragments (snippets) builder classes. + * A {@link org.apache.lucene.search.vectorhighlight.FragmentsBuilder} class can be plugged in to + * {@link org.apache.lucene.search.vectorhighlight.FastVectorHighlighter}. */ public interface FragmentsBuilder { @@ -54,4 +56,34 @@ */ public String[] createFragments( IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments ) throws IOException; + + /** + * create a fragment. + * + * @param reader IndexReader of the index + * @param docId document id to be highlighted + * @param fieldName field of the document to be highlighted + * @param fieldFragList FieldFragList object + * @param encoder an encoder that generates encoded text + * @return a created fragment or null when no fragment created + * @throws IOException + */ + public String createFragment( IndexReader reader, int docId, String fieldName, + FieldFragList fieldFragList, Encoder encoder ) throws IOException; + + /** + * create multiple fragments. + * + * @param reader IndexReader of the index + * @param docId document id to be highlighter + * @param fieldName field of the document to be highlighted + * @param fieldFragList FieldFragList object + * @param maxNumFragments maximum number of fragments + * @param encoder an encoder that generates encoded text + * @return created fragments or null when no fragments created. + * size of the array can be less than maxNumFragments + * @throws IOException + */ + public String[] createFragments( IndexReader reader, int docId, String fieldName, + FieldFragList fieldFragList, int maxNumFragments, Encoder encoder ) throws IOException; }