Index: contrib/CHANGES.txt
===================================================================
--- contrib/CHANGES.txt (revision 982424)
+++ contrib/CHANGES.txt (working copy)
@@ -43,6 +43,9 @@
* LUCENE-2323: Moved contrib/wikipedia functionality into contrib/analyzers.
Additionally the package was changed from org.apache.lucene.wikipedia.analysis
to org.apache.lucene.analysis.wikipedia. (Robert Muir)
+
+ * LUCENE-2581: Added methods to FragmentsBuilder interface. These methods
+ are used to set pre/post tags and Encoder. (Koji Sekiguchi)
Changes in runtime behavior
@@ -190,6 +193,9 @@
* LUCENE-2503: Added lighter stemming alternatives for European languages.
(Robert Muir)
+ * LUCENE-2581: FastVectorHighlighter: add Encoder to FragmentsBuilder.
+ (Koji Sekiguchi)
+
Build
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
Index: contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
===================================================================
--- contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (revision 982424)
+++ contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (working copy)
@@ -27,6 +27,7 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
public class SimpleFragmentsBuilderTest extends AbstractTestCase {
@@ -59,6 +60,15 @@
assertEquals( "b b a b a b b b b b ", f[1] );
assertEquals( "c a a b b", f[2] );
}
+
+ public void testTagsAndEncoder() throws Exception {
+ FieldFragList ffl = ffl( "a", "
a
" );
+ SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
+ String[] preTags = { "[" };
+ String[] postTags = { "]" };
+ assertEquals( "<h1> [a] </h1>",
+ sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
+ }
private FieldFragList ffl( String queryValue, String indexValue ) throws Exception {
make1d1fIndex( indexValue );
Index: contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
===================================================================
--- contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (revision 982424)
+++ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (working copy)
@@ -21,6 +21,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.highlight.Encoder;
/**
* Another highlighter implementation.
@@ -43,7 +44,7 @@
}
/**
- * a constructor. Using SimpleFragListBuilder and ScoreOrderFragmentsBuilder.
+ * a constructor. Using {@link SimpleFragListBuilder} and {@link ScoreOrderFragmentsBuilder}.
*
* @param phraseHighlight true or false for phrase highlighting
* @param fieldMatch true of false for field matching
@@ -53,12 +54,12 @@
}
/**
- * a constructor. A FragListBuilder and a FragmentsBuilder can be specified (plugins).
+ * a constructor. A {@link FragListBuilder} and a {@link FragmentsBuilder} can be specified (plugins).
*
* @param phraseHighlight true of false for phrase highlighting
* @param fieldMatch true of false for field matching
- * @param fragListBuilder an instance of FragListBuilder
- * @param fragmentsBuilder an instance of FragmentsBuilder
+ * @param fragListBuilder an instance of {@link FragListBuilder}
+ * @param fragmentsBuilder an instance of {@link FragmentsBuilder}
*/
public FastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch,
FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder ){
@@ -69,10 +70,10 @@
}
/**
- * create a FieldQuery object.
+ * create a {@link FieldQuery} object.
*
* @param query a query
- * @return the created FieldQuery object
+ * @return the created {@link FieldQuery} object
*/
public FieldQuery getFieldQuery( Query query ){
return new FieldQuery( query, phraseHighlight, fieldMatch );
@@ -81,8 +82,8 @@
/**
* return the best fragment.
*
- * @param fieldQuery FieldQuery object
- * @param reader IndexReader of the index
+ * @param fieldQuery {@link FieldQuery} object
+ * @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
@@ -98,8 +99,8 @@
/**
* return the best fragments.
*
- * @param fieldQuery FieldQuery object
- * @param reader IndexReader of the index
+ * @param fieldQuery {@link FieldQuery} object
+ * @param reader {@link IndexReader} of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fragCharSize the length (number of chars) of a fragment
@@ -113,6 +114,51 @@
FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments );
}
+
+ /**
+ * return the best fragment.
+ *
+ * @param fieldQuery {@link FieldQuery} object
+ * @param reader {@link IndexReader} of the index
+ * @param docId document id to be highlighted
+ * @param fieldName field of the document to be highlighted
+ * @param fragCharSize the length (number of chars) of a fragment
+ * @param preTags pre-tags to be used to highlight terms
+ * @param postTags post-tags to be used to highlight terms
+ * @param encoder an encoder that generates encoded text
+ * @return the best fragment (snippet) string
+ * @throws IOException
+ */
+ public final String getBestFragment( final FieldQuery fieldQuery, IndexReader reader, int docId,
+ String fieldName, int fragCharSize, String[] preTags, String[] postTags,
+ Encoder encoder ) throws IOException {
+ FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize );
+ return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList, preTags, postTags, encoder );
+ }
+
+ /**
+ * return the best fragments.
+ *
+ * @param fieldQuery {@link FieldQuery} object
+ * @param reader {@link IndexReader} of the index
+ * @param docId document id to be highlighted
+ * @param fieldName field of the document to be highlighted
+ * @param fragCharSize the length (number of chars) of a fragment
+ * @param maxNumFragments maximum number of fragments
+ * @param preTags pre-tags to be used to highlight terms
+ * @param postTags post-tags to be used to highlight terms
+ * @param encoder an encoder that generates encoded text
+ * @return created fragments or null when no fragments created.
+ * size of the array can be less than maxNumFragments
+ * @throws IOException
+ */
+ public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId,
+ String fieldName, int fragCharSize, int maxNumFragments, String[] preTags, String[] postTags,
+ Encoder encoder ) throws IOException {
+ FieldFragList fieldFragList = getFieldFragList( fieldQuery, reader, docId, fieldName, fragCharSize );
+ return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments,
+ preTags, postTags, encoder );
+ }
private FieldFragList getFieldFragList( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize ) throws IOException {
Index: contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
===================================================================
--- contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (revision 982424)
+++ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (working copy)
@@ -25,6 +25,8 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.highlight.DefaultEncoder;
+import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
@@ -59,17 +61,34 @@
}
public abstract List getWeightedFragInfoList( List src );
+
+ private static final Encoder NULL_ENCODER = new DefaultEncoder();
public String createFragment( IndexReader reader, int docId,
String fieldName, FieldFragList fieldFragList ) throws IOException {
- String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1 );
+ return createFragment( reader, docId, fieldName, fieldFragList,
+ preTags, postTags, NULL_ENCODER );
+ }
+
+ public String[] createFragments( IndexReader reader, int docId,
+ String fieldName, FieldFragList fieldFragList, int maxNumFragments )
+ throws IOException {
+ return createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments,
+ preTags, postTags, NULL_ENCODER );
+ }
+
+ public String createFragment( IndexReader reader, int docId,
+ String fieldName, FieldFragList fieldFragList, String[] preTags, String[] postTags,
+ Encoder encoder ) throws IOException {
+ String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1,
+ preTags, postTags, encoder );
if( fragments == null || fragments.length == 0 ) return null;
return fragments[0];
}
public String[] createFragments( IndexReader reader, int docId,
- String fieldName, FieldFragList fieldFragList, int maxNumFragments )
- throws IOException {
+ String fieldName, FieldFragList fieldFragList, int maxNumFragments,
+ String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
if( maxNumFragments < 0 )
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
@@ -82,7 +101,7 @@
int[] nextValueIndex = { 0 };
for( int n = 0; n < maxNumFragments && n < fragInfos.size(); n++ ){
WeightedFragInfo fragInfo = fragInfos.get( n );
- fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo ) );
+ fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder ) );
}
return fragments.toArray( new String[fragments.size()] );
}
@@ -102,25 +121,32 @@
@Deprecated
protected String makeFragment( StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo ){
final int s = fragInfo.startOffset;
- return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
+ return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s,
+ preTags, postTags, NULL_ENCODER );
}
- protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo ){
+ protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
+ String[] preTags, String[] postTags, Encoder encoder ){
final int s = fragInfo.startOffset;
- return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
+ return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s,
+ preTags, postTags, encoder );
}
- private String makeFragment( WeightedFragInfo fragInfo, String src, int s ){
+ private String makeFragment( WeightedFragInfo fragInfo, String src, int s,
+ String[] preTags, String[] postTags, Encoder encoder ){
StringBuilder fragment = new StringBuilder();
int srcIndex = 0;
for( SubInfo subInfo : fragInfo.subInfos ){
for( Toffs to : subInfo.termsOffsets ){
- fragment.append( src.substring( srcIndex, to.startOffset - s ) ).append( getPreTag( subInfo.seqnum ) )
- .append( src.substring( to.startOffset - s, to.endOffset - s ) ).append( getPostTag( subInfo.seqnum ) );
+ fragment
+ .append( encoder.encodeText( src.substring( srcIndex, to.startOffset - s ) ) )
+ .append( getPreTag( preTags, subInfo.seqnum ) )
+ .append( encoder.encodeText( src.substring( to.startOffset - s, to.endOffset - s ) ) )
+ .append( getPostTag( postTags, subInfo.seqnum ) );
srcIndex = to.endOffset - s;
}
}
- fragment.append( src.substring( srcIndex ) );
+ fragment.append( encoder.encodeText( src.substring( srcIndex ) ) );
return fragment.toString();
}
@@ -146,13 +172,21 @@
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
return buffer.substring( startOffset, eo );
}
+
+ protected String getPreTag( int num ){
+ return getPreTag( preTags, num );
+ }
- protected String getPreTag( int num ){
+ protected String getPostTag( int num ){
+ return getPostTag( postTags, num );
+ }
+
+ protected String getPreTag( String[] preTags, int num ){
int n = num % preTags.length;
return preTags[n];
}
- protected String getPostTag( int num ){
+ protected String getPostTag( String[] postTags, int num ){
int n = num % postTags.length;
return postTags[n];
}
Index: contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java
===================================================================
--- contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java (revision 982424)
+++ contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FragmentsBuilder.java (working copy)
@@ -20,10 +20,12 @@
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.highlight.Encoder;
/**
- * FragmentsBuilder is an interface for fragments (snippets) builder classes.
- * A FragmentsBuilder class can be plugged in to Highlighter.
+ * {@link org.apache.lucene.search.vectorhighlight.FragmentsBuilder} is an interface for fragments (snippets) builder classes.
+ * A {@link org.apache.lucene.search.vectorhighlight.FragmentsBuilder} class can be plugged in to
+ * {@link org.apache.lucene.search.vectorhighlight.FastVectorHighlighter}.
*/
public interface FragmentsBuilder {
@@ -54,4 +56,40 @@
*/
public String[] createFragments( IndexReader reader, int docId, String fieldName,
FieldFragList fieldFragList, int maxNumFragments ) throws IOException;
+
+ /**
+ * create a fragment.
+ *
+ * @param reader IndexReader of the index
+ * @param docId document id to be highlighted
+ * @param fieldName field of the document to be highlighted
+ * @param fieldFragList FieldFragList object
+ * @param preTags pre-tags to be used to highlight terms
+ * @param postTags post-tags to be used to highlight terms
+ * @param encoder an encoder that generates encoded text
+ * @return a created fragment or null when no fragment created
+ * @throws IOException
+ */
+ public String createFragment( IndexReader reader, int docId, String fieldName,
+ FieldFragList fieldFragList, String[] preTags, String[] postTags,
+ Encoder encoder ) throws IOException;
+
+ /**
+ * create multiple fragments.
+ *
+ * @param reader IndexReader of the index
+ * @param docId document id to be highlighter
+ * @param fieldName field of the document to be highlighted
+ * @param fieldFragList FieldFragList object
+ * @param maxNumFragments maximum number of fragments
+ * @param preTags pre-tags to be used to highlight terms
+ * @param postTags post-tags to be used to highlight terms
+ * @param encoder an encoder that generates encoded text
+ * @return created fragments or null when no fragments created.
+ * size of the array can be less than maxNumFragments
+ * @throws IOException
+ */
+ public String[] createFragments( IndexReader reader, int docId, String fieldName,
+ FieldFragList fieldFragList, int maxNumFragments, String[] preTags, String[] postTags,
+ Encoder encoder ) throws IOException;
}