Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (revision 1395421)
+++ lucene/CHANGES.txt (working copy)
@@ -44,6 +44,11 @@
the suggester to ignore such variations. (Robert Muir, Sudarshan
Gaikaiwari, Mike McCandless)
+Bug Fixes
+
+* LUCENE-1822: BaseFragListBuilder hard-coded 6 char margin is too naive.
+ (Alex Vigdor, Arcadius Ahouansou, Koji Sekiguchi)
+
Optimizations
* LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets
Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/WeightedFragListBuilderTest.java
===================================================================
--- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/WeightedFragListBuilderTest.java (revision 1395421)
+++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/WeightedFragListBuilderTest.java (working copy)
@@ -29,7 +29,7 @@
WeightedFragListBuilder wflb = new WeightedFragListBuilder();
FieldFragList ffl = wflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.getFragInfos().size() );
- assertEquals( "subInfos=(theboth((195,203)))/0.86791086(189,289)", ffl.getFragInfos().get( 0 ).toString() );
+ assertEquals( "subInfos=(theboth((195,203)))/0.86791086(149,249)", ffl.getFragInfos().get( 0 ).toString() );
}
}
Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
===================================================================
--- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (revision 1395421)
+++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (working copy)
@@ -106,7 +106,8 @@
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
- assertEquals( "a b c d e", sfb.createFragment( reader, 0, F, ffl ) );
+ //TODO: Why blank space here? We should probably be trimming?
+ assertEquals( " a b c d e", sfb.createFragment( reader, 0, F, ffl ) );
}
public void test1PhraseLongMV() throws Exception {
@@ -118,7 +119,7 @@
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
- assertEquals( "The most search engines use only one of these methods. Even the search engines that says they can use the",
+ assertEquals( "customization: The most search engines use only one of these methods. Even the search engines that says they can",
sfb.createFragment( reader, 0, F, ffl ) );
}
@@ -131,7 +132,7 @@
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
- assertEquals( "processing speed, the", sfb.createFragment( reader, 0, F, ffl ) );
+ assertEquals( "additional hardware. \nWhen you talk about processing speed, the", sfb.createFragment( reader, 0, F, ffl ) );
}
public void testUnstoredField() throws Exception {
@@ -209,7 +210,7 @@
String[] result = sfb.createFragments(reader, 0, F, ffl, 3);
assertEquals(2, result.length);
assertEquals("some text to highlight", result[0]);
- assertEquals("other text", result[1]);
+ assertEquals("highlight other text", result[1]);
fq = new FieldQuery( tq( "highlight" ), true, true );
stack = new FieldTermStack( reader, 0, F, fq );
Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
===================================================================
--- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (revision 1395421)
+++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (working copy)
@@ -42,7 +42,7 @@
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), sflb.minFragCharSize );
assertEquals( 1, ffl.getFragInfos().size() );
- assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
+ assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,18)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
@@ -55,7 +55,7 @@
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh jklmnopqrs" ), sflb.minFragCharSize );
assertEquals( 1, ffl.getFragInfos().size() );
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
- assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
+ assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(1,19)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1TermIndex() throws Exception {
@@ -77,7 +77,7 @@
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b b b b a b b b b a" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() );
- assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
+ assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(4,24)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test2TermsIndex2Frags() throws Exception {
@@ -85,7 +85,7 @@
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
- assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
+ assertEquals( "subInfos=(a((28,29)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() );
@@ -164,7 +164,7 @@
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.getFragInfos().size() );
- assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.getFragInfos().get( 0 ).toString() );
+ assertEquals( "subInfos=(d((9,10)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1PhraseLongMV() throws Exception {
@@ -176,7 +176,7 @@
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.getFragInfos().size() );
- assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.getFragInfos().get( 0 ).toString() );
+ assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(87,187)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1PhraseLongMVB() throws Exception {
@@ -188,6 +188,6 @@
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.getFragInfos().size() );
- assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.getFragInfos().get( 0 ).toString() );
+ assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(41,141)", ffl.getFragInfos().get( 0 ).toString() );
}
}
Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java
===================================================================
--- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java (revision 1395421)
+++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java (working copy)
@@ -69,13 +69,14 @@
wpil.clear();
wpil.add( phraseInfo );
+ int firstOffset = phraseInfo.getStartOffset();
int st = phraseInfo.getStartOffset() - margin < startOffset ?
startOffset : phraseInfo.getStartOffset() - margin;
int en = st + fragCharSize;
if( phraseInfo.getEndOffset() > en )
en = phraseInfo.getEndOffset();
- startOffset = en;
+ int lastEndOffset = phraseInfo.getEndOffset();
while( true ){
if( ite.hasNext() ){
phraseInfo = ite.next();
@@ -84,11 +85,22 @@
}
else
break;
- if( phraseInfo.getEndOffset() <= en )
+ if( phraseInfo.getEndOffset() <= en ){
wpil.add( phraseInfo );
+ lastEndOffset = phraseInfo.getEndOffset();
+ }
else
break;
}
+ int matchLen = lastEndOffset - firstOffset;
+ //now recalculate the start and end position to "center" the result
+ int newMargin = (fragCharSize-matchLen)/2;
+ st = firstOffset - newMargin;
+ if(st