Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PosHighlighter.java
===================================================================
--- lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PosHighlighter.java (revision 1144554)
+++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PosHighlighter.java (working copy)
@@ -3,17 +3,15 @@
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.TermVectorMapper;
-import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.positions.PositionIntervalIterator.PositionInterval;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
+/*
+ */
public class PosHighlighter {
private Formatter formatter;
private Encoder encoder;
@@ -67,14 +65,16 @@
int matchEnd = pom.getEndOffset(pos.end);
int fragStart = Math.max(0, matchStart - (fragSize - (matchEnd-matchStart)) / 2);
int fragEnd = Math.min(fragStart+fragSize, text.length());
-
+ // FIXME - non-initial fragments fail to highlight matches occurring between fragStart
+ // and matchStart
+ // FIXME - also; fragments should not overlap
for (;;) {
// Build up a single fragment, possibly including multiple positions
if (matchStart > fragStart)
buf.append (text, fragStart, matchStart);
- buf.append (""); // TODO - parameterize
+ buf.append (""); // TODO - parameterize
buf.append (text, matchStart, matchEnd);
- buf.append ("");
+ buf.append ("");
if (fragEnd <= matchEnd) {
break;
}
@@ -120,47 +120,4 @@
// TODO - get maxNumFragments top fragments by score
return null;
}
-
- class PositionOffsetMapper extends TermVectorMapper {
- private int maxPos = 0;
- private static final int BUF_SIZE = 128;
- int startOffset[] = new int[BUF_SIZE], endOffset[] = new int[BUF_SIZE];
-
- public void setExpectations(String field, int numTerms,
- boolean storeOffsets, boolean storePositions) {
- }
-
- public void map(BytesRef term, int frequency,
- TermVectorOffsetInfo[] offsets, int[] positions)
- {
- for (int i = 0; i < positions.length; i++) {
- int pos = positions[i];
- if (pos >= startOffset.length) {
- grow (pos + BUF_SIZE);
- maxPos = pos;
- } else if (pos > maxPos) {
- maxPos = pos;
- }
- startOffset[pos] = offsets[i].getStartOffset();
- endOffset[pos] = offsets[i].getEndOffset();
- }
- }
-
- private void grow (int size) {
- startOffset = ArrayUtil.grow (startOffset, size);
- endOffset = ArrayUtil.grow (endOffset, size);
- }
-
- public int getStartOffset(int pos) {
- return startOffset[pos];
- }
-
- public int getEndOffset(int pos) {
- return endOffset[pos];
- }
-
- public int getMaxPosition() {
- return maxPos;
- }
- }
}
Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PosTokenStream.java
===================================================================
--- lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PosTokenStream.java (revision 0)
+++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PosTokenStream.java (revision 0)
@@ -0,0 +1,58 @@
+package org.apache.lucene.search.poshighlight;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.search.positions.PositionIntervalIterator;
+import org.apache.lucene.search.positions.PositionIntervalIterator.PositionInterval;
+
+/**
+ * A TokenStream built from a String and predetermined PositionIntervals.
+ * The document is segmented into tokens within and between the intervals. The intervals
+ * are assumed to be non-overlapping.
+ *
+ * Maybe this should be built using a PositionIntervalIterator instead?
+ */
+public class PosTokenStream extends TokenStream {
+
+ //this tokenizer generates four attributes:
+ // term, offset, positionIncrement? and type?
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ //private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+ private final String text;
+ private final PositionIntervalIterator positions;
+
+ // the index of the current position interval
+ private PositionInterval pos = null;
+ // whether we are currently at the start of an interval, or if false, the end.
+ private boolean atBegin = false;
+ private final PositionOffsetMapper pom;
+
+ public PosTokenStream (String text, PositionIntervalIterator positions, PositionOffsetMapper pom) {
+ this.text = text;
+ this.positions = positions;
+ this.pom = pom;
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ pos = positions.next();
+ if (pos == null){
+ return false;
+ }
+ int b, e;
+ b = pom.getStartOffset(pos.begin);
+ e = pom.getEndOffset(pos.end);
+ termAtt.append(text, b, e);
+ offsetAtt.setOffset(b, e);
+ posIncrAtt.setPositionIncrement(1);
+ atBegin = !atBegin;
+ return true;
+ }
+
+}
Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PositionIntervalArrayIterator.java
===================================================================
--- lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PositionIntervalArrayIterator.java (revision 0)
+++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PositionIntervalArrayIterator.java (revision 0)
@@ -0,0 +1,29 @@
+package org.apache.lucene.search.poshighlight;
+
+import org.apache.lucene.search.positions.PositionIntervalIterator;
+
+public class PositionIntervalArrayIterator extends PositionIntervalIterator {
+
+ private int next = 0;
+ private int count;
+ private PositionInterval[] positions;
+
+ public PositionIntervalArrayIterator (PositionInterval[] positions, int count) {
+ super(null);
+ this.positions = positions;
+ this.count = count;
+ }
+
+ @Override
+ public PositionInterval next() {
+ if (next >= count)
+ return null;
+ return positions[next++];
+ }
+
+ @Override
+ public PositionIntervalIterator[] subs(boolean inOrder) {
+ return null;
+ }
+
+}
\ No newline at end of file
Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PositionOffsetMapper.java
===================================================================
--- lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PositionOffsetMapper.java (revision 0)
+++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PositionOffsetMapper.java (revision 0)
@@ -0,0 +1,49 @@
+package org.apache.lucene.search.poshighlight;
+
+import org.apache.lucene.index.TermVectorMapper;
+import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+
+public class PositionOffsetMapper extends TermVectorMapper {
+ private int maxPos = 0;
+ private static final int BUF_SIZE = 128;
+ int startOffset[] = new int[BUF_SIZE], endOffset[] = new int[BUF_SIZE];
+
+ public void setExpectations(String field, int numTerms,
+ boolean storeOffsets, boolean storePositions) {
+ }
+
+ public void map(BytesRef term, int frequency,
+ TermVectorOffsetInfo[] offsets, int[] positions)
+ {
+ for (int i = 0; i < positions.length; i++) {
+ int pos = positions[i];
+ if (pos >= startOffset.length) {
+ grow (pos + BUF_SIZE);
+ maxPos = pos;
+ } else if (pos > maxPos) {
+ maxPos = pos;
+ }
+ startOffset[pos] = offsets[i].getStartOffset();
+ endOffset[pos] = offsets[i].getEndOffset();
+ }
+ }
+
+ private void grow (int size) {
+ startOffset = ArrayUtil.grow (startOffset, size);
+ endOffset = ArrayUtil.grow (endOffset, size);
+ }
+
+ public int getStartOffset(int pos) {
+ return startOffset[pos];
+ }
+
+ public int getEndOffset(int pos) {
+ return endOffset[pos];
+ }
+
+ public int getMaxPosition() {
+ return maxPos;
+ }
+}
\ No newline at end of file
Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PositionTreeIterator.java
===================================================================
--- lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PositionTreeIterator.java (revision 1144554)
+++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/PositionTreeIterator.java (working copy)
@@ -34,7 +34,7 @@
return null;
Frame f = stack[curframe];
if (f.subs == null) {
- pos = stack[curframe].positions.next();
+ pos = f.positions.next();
if (pos != null)
return pos;
}
Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/ScorePosDoc.java
===================================================================
--- lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/ScorePosDoc.java (revision 1144554)
+++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/poshighlight/ScorePosDoc.java (working copy)
@@ -29,6 +29,7 @@
if (posCount >= positions.length) {
PositionInterval temp[] = new PositionInterval[positions.length * 2];
System.arraycopy(positions, 0, temp, 0, positions.length);
+ positions = temp;
}
positions[posCount++] = (PositionInterval) pos.clone();
}
Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java
===================================================================
--- lucene/contrib/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java (revision 1144554)
+++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java (working copy)
@@ -1,14 +1,11 @@
package org.apache.lucene.search.poshighlight;
-import java.io.BufferedReader;
-import java.io.FileInputStream;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
@@ -21,6 +18,7 @@
import org.apache.lucene.index.codecs.CoreCodecProvider;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiTermQuery;
@@ -28,28 +26,22 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.poshighlight.PosCollector;
-import org.apache.lucene.search.poshighlight.PosHighlighter;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.positions.OrderedConjunctionPositionIterator;
import org.apache.lucene.search.positions.PositionIntervalIterator;
+import org.apache.lucene.search.positions.PositionIntervalIterator.PositionIntervalFilter;
import org.apache.lucene.search.positions.WithinPositionIterator;
-import org.apache.lucene.search.positions.PositionIntervalIterator.PositionIntervalFilter;
import org.apache.lucene.search.spans.MockSpanQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Ignore;
/**
- * Notes: to fully implement, we need:
- * 1) ability to walk the individual terms that matched, possibly in a hierarchical way
- * if we want to implement really clever highlighting?
- * 2) some Collector api like the one I made up, and support in Searcher
- * 3) All (or more) queries implemented
- *
- * For hl perf testing we could test term queries only using the current impl
- * @author sokolov
- *
+ * TODO:
+ * Phrase and Span Queries
+ * positions callback API
*/
public class PosHighlighterTest extends LuceneTestCase {
@@ -57,8 +49,12 @@
protected Analyzer analyzer;
protected QueryParser parser;
protected Directory dir;
- protected IndexSearcher searcher;
+ protected IndexSearcher searcher;
+ private static final String PORRIDGE_VERSE =
+ "Pease porridge hot! Pease porridge cold! Pease porridge in the pot nine days old! Some like it hot, some"
+ + " like it cold, Some like it in the pot nine days old! Pease porridge hot! Pease porridge cold!";
+
@Override
public void setUp() throws Exception {
super.setUp();
@@ -99,28 +95,75 @@
searcher = new IndexSearcher( dir, true );
}
- private String[] doSearch(Query q) throws IOException {
+ private String[] doSearch(Query q) throws IOException, InvalidTokenOffsetsException {
return doSearch(q, 100);
}
- private String[] doSearch(Query q, int maxFragSize) throws IOException {
- PosHighlighter ph = new PosHighlighter();
+ private class ConstantScorer implements org.apache.lucene.search.highlight.Scorer {
+
+ @Override
+ public TokenStream init(TokenStream tokenStream) throws IOException {
+ return tokenStream;
+ }
+
+ @Override
+ public void startFragment(TextFragment newFragment) {
+ }
+
+ @Override
+ public float getTokenScore() {
+ return 1;
+ }
+
+ @Override
+ public float getFragmentScore() {
+ return 1;
+ }
+ }
+
+ private String[] doSearch(Query q, int maxFragSize) throws IOException, InvalidTokenOffsetsException {
+ return doSearch (q, maxFragSize, 0);
+ }
+
+ private String[] doSearch(Query q, int maxFragSize, int docIndex) throws IOException, InvalidTokenOffsetsException {
+ //PosHighlighter ph = new PosHighlighter();
+ // ConstantScorer is a fragment Scorer, not a search result (document) Scorer
+ Highlighter highlighter = new Highlighter (new ConstantScorer());
+ highlighter.setTextFragmenter(new SimpleFragmenter(maxFragSize));
PosCollector collector = new PosCollector (10);
+ if (q instanceof MultiTermQuery) {
+ ((MultiTermQuery)q).setRewriteMethod (MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+ q = q.rewrite(searcher.getIndexReader());
+ }
searcher.search(q, collector);
- return ph.getFirstFragments(collector.docs[0], searcher.getIndexReader(), F, true, 10, maxFragSize);
+ //return ph.getFirstFragments(collector.docs[0], searcher.getIndexReader(), F, true, 10, maxFragSize);
+ ScorePosDoc doc = collector.docs[docIndex];
+ if (doc == null)
+ return null;
+ String text = searcher.getIndexReader().document(doc.doc).getFieldable(F).stringValue();
+ PositionOffsetMapper pom = new PositionOffsetMapper ();
+ // FIXME: test error cases: for non-stored fields, and fields w/no term vectors
+ searcher.getIndexReader().getTermFreqVector(doc.doc, F, pom);
+ TextFragment[] fragTexts = highlighter.getBestTextFragments(new PosTokenStream
+ (text, new PositionIntervalArrayIterator(doc.positions, doc.posCount), pom),
+ text, false, 10);
+ String[] frags = new String[fragTexts.length];
+ for (int i = 0; i < frags.length; i++)
+ frags[i] = fragTexts[i].toString();
+ return frags;
}
public void testTerm () throws Exception {
insertDocs(analyzer, "This is a test");
String frags[] = doSearch (new TermQuery(new Term(F, "test")));
- assertEquals ("This is a test", frags[0]);
+ assertEquals ("This is a test", frags[0]);
}
public void testSeveralSnippets () throws Exception {
String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
"Let us see what happens to long in this case.";
- String gold = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
- "Let us see what happens to long in this case.";
+ String gold = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
+ "Let us see what happens to long in this case.";
insertDocs(analyzer, input);
String frags[] = doSearch (new TermQuery(new Term(F, "long")), input.length());
assertEquals (gold, frags[0]);
@@ -132,7 +175,7 @@
bq.add(new BooleanClause (new TermQuery(new Term(F, "This")), Occur.MUST));
bq.add(new BooleanClause (new TermQuery(new Term(F, "test")), Occur.MUST));
String frags[] = doSearch (bq);
- assertEquals ("This is a test", frags[0]);
+ assertEquals ("This is a test", frags[0]);
}
public void testBooleanAndOtherOrder () throws Exception {
@@ -141,33 +184,58 @@
bq.add(new BooleanClause (new TermQuery(new Term(F, "test")), Occur.MUST));
bq.add(new BooleanClause (new TermQuery(new Term(F, "This")), Occur.MUST));
String frags[] = doSearch (bq);
- assertEquals ("This is a test", frags[0]);
+ assertEquals ("This is a test", frags[0]);
}
-
+
public void testBooleanOr () throws Exception {
- // OR queries not implemented yet...
insertDocs(analyzer, "This is a test");
BooleanQuery bq = new BooleanQuery();
bq.add(new BooleanClause (new TermQuery(new Term(F, "test")), Occur.SHOULD));
bq.add(new BooleanClause (new TermQuery(new Term(F, "This")), Occur.SHOULD));
String frags[] = doSearch (bq);
- assertEquals ("This is a test", frags[0]);
+ assertEquals ("This is a test", frags[0]);
}
- @Ignore("not supproted yet")
+ public void testBooleanNrShouldMatch () throws Exception {
+ insertDocs(analyzer, "a b c d e f g h i");
+ BooleanQuery bq = new BooleanQuery();
+ bq.add(new BooleanClause (new TermQuery(new Term(F, "a")), Occur.SHOULD));
+ bq.add(new BooleanClause (new TermQuery(new Term(F, "b")), Occur.SHOULD));
+ bq.add(new BooleanClause (new TermQuery(new Term(F, "no")), Occur.SHOULD));
+
+ // This generates a ConjunctionSumScorer
+ bq.setMinimumNumberShouldMatch(2);
+ String frags[] = doSearch (bq);
+ assertEquals ("a b c d e f g h i", frags[0]);
+
+ // This generates no scorer
+ bq.setMinimumNumberShouldMatch(3);
+ frags = doSearch (bq);
+ assertNull (frags);
+
+ // This generates a DisjunctionSumScorer
+ bq.setMinimumNumberShouldMatch(2);
+ bq.add(new BooleanClause (new TermQuery(new Term(F, "c")), Occur.SHOULD));
+ frags = doSearch (bq);
+ assertEquals ("a b c d e f g h i", frags[0]);
+ }
+
+ /*
+ * Failing ... MockSpanQuery scorer needs positions()?
+ */
public void testPhrase() throws Exception {
insertDocs(analyzer, "This is a test");
BooleanQuery bq = new BooleanQuery();
bq.add(new BooleanClause (new TermQuery(new Term(F, "is")), Occur.MUST));
bq.add(new BooleanClause (new TermQuery(new Term(F, "a")), Occur.MUST));
- MockSpanQuery msq = new MockSpanQuery(bq, false, F, new Filter(1));
+ MockSpanQuery msq = new MockSpanQuery(bq, false, F, new WithinPositionFilter(1));
String frags[] = doSearch (msq);
- assertEquals ("This is a test", frags[0]);
+ assertEquals ("This is a test", frags[0]);
}
- public static class Filter implements PositionIntervalFilter {
+ public static class WithinPositionFilter implements PositionIntervalFilter {
private int slop;
- public Filter(int slop) {
+ public WithinPositionFilter(int slop) {
this.slop = slop;
}
@Override
@@ -175,7 +243,10 @@
return new WithinPositionIterator(slop, new OrderedConjunctionPositionIterator(iter));
}
}
- @Ignore("not supproted yet")
+
+ /*
+ * Failing ... PhraseQuery scorer needs positions()?
+ */
public void testPhraseOriginal() throws Exception {
insertDocs(analyzer, "This is a test");
PhraseQuery pq = new PhraseQuery();
@@ -183,44 +254,45 @@
pq.add(new Term(F, "test"));
String frags[] = doSearch (pq);
//searcher.search(new MockSpanQuery(pq, collector.needsPayloads(), F, null), collector);
- assertEquals ("This is a test", frags[0]);
+ assertEquals ("This is a test", frags[0]);
}
public void testWildcard () throws Exception {
insertDocs(analyzer, "This is a test");
- WildcardQuery wildcardQuery = new WildcardQuery(new Term(F, "t*t"));
- // TODO enable positions in constant scorer
- wildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
- String frags[] = doSearch(wildcardQuery);
- assertEquals ("This is a test", frags[0]);
+ String frags[] = doSearch (new WildcardQuery(new Term(F, "t*t")));
+ assertEquals ("This is a test", frags[0]);
}
-//
-// @Ignore("file epistolary-novel.xml does not exist")
-// public void testLargerDocument() throws Exception {
-// InputStream in = new FileInputStream ("epistolary-novel.xml");
-// insertDocs(analyzer, IOUtils.toString(in));
-// in.close();
-// BooleanQuery bq = new BooleanQuery();
-// bq.add(new BooleanClause (new TermQuery(new Term(F, "unknown")), Occur.MUST));
-// bq.add(new BooleanClause (new TermQuery(new Term(F, "artist")), Occur.MUST));
-// String frags[] = doSearch (bq, 50);
-// assertEquals ("is a narration by an unknown observer.\n*[[Jean Web", frags[0]);
-// assertEquals ("fin and Sabine]]'' by artist [[Nick Bantock]] is a", frags[1]);
-// }
-// @Ignore("file epistolary-novel.xml does not exist")
-// public void testMultipleDocuments() throws Exception {
-// InputStream in = new FileInputStream ("epistolary-novel.xml");
-// insertDocs(analyzer,
-// "This document has no matches",
-// IOUtils.toString(in),
-// "This document has an unknown artist match");
-// BooleanQuery bq = new BooleanQuery();
-// bq.add(new BooleanClause (new TermQuery(new Term(F, "unknown")), Occur.MUST));
-// bq.add(new BooleanClause (new TermQuery(new Term(F, "artist")), Occur.MUST));
-// String frags[] = doSearch (bq, 50);
-// assertEquals ("is a narration by an unknown observer.\n*[[Jean Web", frags[0]);
-// assertEquals ("fin and Sabine]]'' by artist [[Nick Bantock]] is a", frags[1]);
-// }
+ public void testMultipleDocumentsAnd() throws Exception {
+ insertDocs(analyzer,
+ "This document has no matches",
+ PORRIDGE_VERSE,
+ "This document has some Pease porridge in it");
+ BooleanQuery bq = new BooleanQuery();
+ bq.add(new BooleanClause (new TermQuery(new Term(F, "Pease")), Occur.MUST));
+ bq.add(new BooleanClause (new TermQuery(new Term(F, "porridge")), Occur.MUST));
+ String frags[] = doSearch (bq, 50, 0);
+ assertEquals ("Pease porridge hot! Pease porridge cold! Pease", frags[0]);
+ frags = doSearch (bq, 50, 1);
+ assertEquals ("This document has some Pease porridge in it", frags[0]);
+ }
+
+ /*
+ * Failing: need positions callback API since DisjunctionSumScorer consumes all of a doc's
+ * positions before passing the doc to the collector.
+ */
+ public void testMultipleDocumentsOr() throws Exception {
+ insertDocs(analyzer,
+ "This document has no matches",
+ PORRIDGE_VERSE,
+ "This document has some Pease porridge in it");
+ BooleanQuery bq = new BooleanQuery();
+ bq.add(new BooleanClause (new TermQuery(new Term(F, "Pease")), Occur.SHOULD));
+ bq.add(new BooleanClause (new TermQuery(new Term(F, "porridge")), Occur.SHOULD));
+ String frags[] = doSearch (bq, 50, 0);
+ assertEquals ("Pease porridge hot! Pease porridge cold! Pease", frags[0]);
+ frags = doSearch (bq, 50, 1);
+ assertEquals ("This document has some Pease porridge in it", frags[0]);
+ }
-}
\ No newline at end of file
+}
Index: lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 1144554)
+++ lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy)
@@ -20,6 +20,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.positions.PositionIntervalIterator;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
@@ -246,6 +247,15 @@
return super.score(collector, max, firstDocID);
}
}
+
+ @Override
+ public PositionIntervalIterator positions() throws IOException {
+ if (docIdSetIterator instanceof Scorer) {
+ return ((Scorer) docIdSetIterator).positions();
+ } else {
+ return super.positions();
+ }
+ }
}
@Override
Index: lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (revision 1144554)
+++ lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (working copy)
@@ -20,6 +20,7 @@
import java.util.List;
import java.io.IOException;
+import org.apache.lucene.search.positions.ConjunctionPositionIterator;
import org.apache.lucene.search.positions.DisjunctionPositionIterator;
import org.apache.lucene.search.positions.PositionIntervalIterator;
import org.apache.lucene.util.ScorerDocQueue;
@@ -241,7 +242,7 @@
@Override
public PositionIntervalIterator positions() throws IOException {
if (minimumNrMatchers > 1) {
- throw new IllegalStateException("positions not implemented for minimum matches > 1");
+ return new ConjunctionPositionIterator(this, subScorers.toArray(new Scorer[0]), minimumNrMatchers);
}
return new DisjunctionPositionIterator(this, subScorers.toArray(new Scorer[0]));
}
Index: lucene/src/java/org/apache/lucene/search/positions/ConjunctionPositionIterator.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/positions/ConjunctionPositionIterator.java (revision 1144554)
+++ lucene/src/java/org/apache/lucene/search/positions/ConjunctionPositionIterator.java (working copy)
@@ -22,7 +22,7 @@
/**
* ConjuctionPositionIterator based on minimal interval semantics for AND
- * operator
+ * operator.
*
*