Index: contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
===================================================================
--- contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (revision 982251)
+++ contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (working copy)
@@ -27,6 +27,7 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
public class SimpleFragmentsBuilderTest extends AbstractTestCase {
@@ -59,6 +60,12 @@
assertEquals( "b b a b a b b b b b ", f[1] );
assertEquals( "c a a b b", f[2] );
}
+
+ public void testEncoder() throws Exception {
+ FieldFragList ffl = ffl( "a", "
a
" );
+ SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
+ assertEquals( "<h1> a </h1>", sfb.createFragment( reader, 0, F, ffl, new SimpleHTMLEncoder() ) );
+ }
private FieldFragList ffl( String queryValue, String indexValue ) throws Exception {
make1d1fIndex( indexValue );
Index: contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
===================================================================
--- contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (revision 982251)
+++ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (working copy)
@@ -21,6 +21,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.highlight.Encoder;
/**
* Another highlighter implementation.
@@ -43,7 +44,7 @@
}
/**
- * a constructor. Using SimpleFragListBuilder and ScoreOrderFragmentsBuilder.
+ * a constructor. Using {@link SimpleFragListBuilder} and {@link ScoreOrderFragmentsBuilder}.
*
* @param phraseHighlight true or false for phrase highlighting
* @param fieldMatch true of false for field matching
@@ -53,12 +54,12 @@
}
/**
- * a constructor. A FragListBuilder and a FragmentsBuilder can be specified (plugins).
+ * a constructor. A {@link FragListBuilder} and a {@link FragmentsBuilder} can be specified (plugins).
*
* @param phraseHighlight true of false for phrase highlighting
* @param fieldMatch true of false for field matching
- * @param fragListBuilder an instance of FragListBuilder
- * @param fragmentsBuilder an instance of FragmentsBuilder
+ * @param fragListBuilder an instance of {@link FragListBuilder}
+ * @param fragmentsBuilder an instance of {@link FragmentsBuilder}
*/
public FastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch,
FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder ){
@@ -69,10 +70,10 @@
}
/**
- * create a FieldQuery object.
+ * create a {@link FieldQuery} object.
*
* @param query a query
- * @return the created FieldQuery object
+ * @return the created {@link FieldQuery} object
*/
public FieldQuery getFieldQuery( Query query ){
return new FieldQuery( query, phraseHighlight, fieldMatch );
@@ -81,8 +82,8 @@
/**
* return the best fragment.
*
- * @param fieldQuery FieldQuery object
- * @param reader IndexReader of the index
+ * @param fieldQuery {@link FieldQuery} object
+ * @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
@@ -98,8 +99,8 @@
/**
* return the best fragments.
*
- * @param fieldQuery FieldQuery object
- * @param reader IndexReader of the index
+ * @param fieldQuery {@link FieldQuery} object
+ * @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
@@ -113,6 +114,44 @@
FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments );
}
+
+ /**
+ * return the best fragment.
+ *
+ * @param fieldQuery {@link FieldQuery} object
+ * @param reader {@link IndexReader} of the index
+ * @param docId document id to be highlighted
+ * @param fieldName field of the document to be highlighted
+ * @param fragCharSize the length (number of chars) of a fragment
+ * @param encoder an encoder that generates encoded text
+ * @return the best fragment (snippet) string
+ * @throws IOException
+ */
+ public final String getBestFragment( final FieldQuery fieldQuery, IndexReader reader, int docId,
+ String fieldName, int fragCharSize, Encoder encoder ) throws IOException {
+ FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize );
+ return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList, encoder );
+ }
+
+ /**
+ * return the best fragments.
+ *
+ * @param fieldQuery {@link FieldQuery} object
+ * @param reader {@link IndexReader} of the index
+ * @param docId document id to be highlighted
+ * @param fieldName field of the document to be highlighted
+ * @param fragCharSize the length (number of chars) of a fragment
+ * @param maxNumFragments maximum number of fragments
+ * @param encoder an encoder that generates encoded text
+ * @return created fragments or null when no fragments created.
+ * size of the array can be less than maxNumFragments
+ * @throws IOException
+ */
+ public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId,
+ String fieldName, int fragCharSize, int maxNumFragments, Encoder encoder ) throws IOException {
+ FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize );
+ return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments, encoder );
+ }
private FieldFragList getFieldFragList( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize ) throws IOException {
Index: contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
===================================================================
--- contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (revision 982251)
+++ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (working copy)
@@ -25,6 +25,8 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.highlight.DefaultEncoder;
+import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
@@ -59,16 +61,29 @@
}
public abstract List getWeightedFragInfoList( List src );
+
+ private static final Encoder NULL_ENCODER = new DefaultEncoder();
public String createFragment( IndexReader reader, int docId,
String fieldName, FieldFragList fieldFragList ) throws IOException {
- String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1 );
+ return createFragment( reader, docId, fieldName, fieldFragList, NULL_ENCODER );
+ }
+
+ public String[] createFragments( IndexReader reader, int docId,
+ String fieldName, FieldFragList fieldFragList, int maxNumFragments )
+ throws IOException {
+ return createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments, NULL_ENCODER );
+ }
+
+ public String createFragment( IndexReader reader, int docId,
+ String fieldName, FieldFragList fieldFragList, Encoder encoder ) throws IOException {
+ String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1, encoder );
if( fragments == null || fragments.length == 0 ) return null;
return fragments[0];
}
public String[] createFragments( IndexReader reader, int docId,
- String fieldName, FieldFragList fieldFragList, int maxNumFragments )
+ String fieldName, FieldFragList fieldFragList, int maxNumFragments, Encoder encoder )
throws IOException {
if( maxNumFragments < 0 )
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
@@ -82,7 +97,7 @@
int[] nextValueIndex = { 0 };
for( int n = 0; n < maxNumFragments && n < fragInfos.size(); n++ ){
WeightedFragInfo fragInfo = fragInfos.get( n );
- fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo ) );
+ fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo, encoder ) );
}
return fragments.toArray( new String[fragments.size()] );
}
@@ -102,25 +117,28 @@
@Deprecated
protected String makeFragment( StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo ){
final int s = fragInfo.startOffset;
- return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
+ return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s, NULL_ENCODER );
}
- protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo ){
+ protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, Encoder encoder ){
final int s = fragInfo.startOffset;
- return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
+ return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s, encoder );
}
- private String makeFragment( WeightedFragInfo fragInfo, String src, int s ){
+ private String makeFragment( WeightedFragInfo fragInfo, String src, int s, Encoder encoder ){
StringBuilder fragment = new StringBuilder();
int srcIndex = 0;
for( SubInfo subInfo : fragInfo.subInfos ){
for( Toffs to : subInfo.termsOffsets ){
- fragment.append( src.substring( srcIndex, to.startOffset - s ) ).append( getPreTag( subInfo.seqnum ) )
- .append( src.substring( to.startOffset - s, to.endOffset - s ) ).append( getPostTag( subInfo.seqnum ) );
+ fragment
+ .append( encoder.encodeText( src.substring( srcIndex, to.startOffset - s ) ) )
+ .append( getPreTag( subInfo.seqnum ) )
+ .append( encoder.encodeText( src.substring( to.startOffset - s, to.endOffset - s ) ) )
+ .append( getPostTag( subInfo.seqnum ) );
srcIndex = to.endOffset - s;
}
}
- fragment.append( src.substring( srcIndex ) );
+ fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
return fragment.toString();
}
Index: contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java
===================================================================
--- contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java (revision 982251)
+++ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java (working copy)
@@ -20,10 +20,12 @@
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.highlight.Encoder;
/**
- * FragmentsBuilder is an interface for fragments (snippets) builder classes.
- * A FragmentsBuilder class can be plugged in to Highlighter.
+ * {@link org.apache.lucene.search.vectorhighlight.FragmentsBuilder} is an interface for fragments (snippets) builder classes.
+ * A {@link org.apache.lucene.search.vectorhighlight.FragmentsBuilder} class can be plugged in to
+ * {@link org.apache.lucene.search.vectorhighlight.FastVectorHighlighter}.
*/
public interface FragmentsBuilder {
@@ -54,4 +56,34 @@
*/
public String[] createFragments( IndexReader reader, int docId, String fieldName,
FieldFragList fieldFragList, int maxNumFragments ) throws IOException;
+
+ /**
+ * create a fragment.
+ *
+ * @param reader IndexReader of the index
+ * @param docId document id to be highlighted
+ * @param fieldName field of the document to be highlighted
+ * @param fieldFragList FieldFragList object
+ * @param encoder an encoder that generates encoded text
+ * @return a created fragment or null when no fragment created
+ * @throws IOException
+ */
+ public String createFragment( IndexReader reader, int docId, String fieldName,
+ FieldFragList fieldFragList, Encoder encoder ) throws IOException;
+
+ /**
+ * create multiple fragments.
+ *
+ * @param reader IndexReader of the index
+ * @param docId document id to be highlighter
+ * @param fieldName field of the document to be highlighted
+ * @param fieldFragList FieldFragList object
+ * @param maxNumFragments maximum number of fragments
+ * @param encoder an encoder that generates encoded text
+ * @return created fragments or null when no fragments created.
+ * size of the array can be less than maxNumFragments
+ * @throws IOException
+ */
+ public String[] createFragments( IndexReader reader, int docId, String fieldName,
+ FieldFragList fieldFragList, int maxNumFragments, Encoder encoder ) throws IOException;
}