diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java index 3294de1..47c8533 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.search.positions.BlockPositionIterator; import org.apache.lucene.search.positions.PositionIntervalIterator; import org.apache.lucene.search.similarities.Similarity; @@ -36,6 +37,7 @@ final class ExactPhraseScorer extends Scorer { boolean noDocs; private final static class ChunkState { + final TermQuery.TermDocsEnumFactory factory; final DocsAndPositionsEnum posEnum; final int offset; final boolean useAdvance; @@ -44,8 +46,9 @@ final class ExactPhraseScorer extends Scorer { int pos; int lastPos; - public ChunkState(DocsAndPositionsEnum posEnum, int offset, + public ChunkState(TermQuery.TermDocsEnumFactory factory, DocsAndPositionsEnum posEnum, int offset, boolean useAdvance) { + this.factory = factory; this.posEnum = posEnum; this.offset = offset; this.useAdvance = useAdvance; @@ -77,7 +80,7 @@ final class ExactPhraseScorer extends Scorer { // ANDing. This buys ~15% gain for phrases where // freq of rarest 2 terms is close: final boolean useAdvance = postings[i].docFreq > 5 * postings[0].docFreq; - chunkStates[i] = new ChunkState(postings[i].postings, + chunkStates[i] = new ChunkState(postings[i].factory, postings[i].postings, -postings[i].position, useAdvance); if (i > 0 && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { @@ -323,7 +326,10 @@ final class ExactPhraseScorer extends Scorer { @Override public PositionIntervalIterator positions(boolean needsPayloads, boolean needsOffsets) throws IOException { - // nocommit implement this - throw new UnsupportedOperationException(); + TermScorer.TermPositions[] posIters = new TermScorer.TermPositions[chunkStates.length]; + for (int i = 0; i < chunkStates.length; i++) { + posIters[i] = new TermScorer.TermPositions(this, chunkStates[i].factory.docsAndPositionsEnum(needsOffsets), needsPayloads); + } + return new BlockPositionIterator(this, posIters); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index e008c19..48eb368 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -17,26 +17,14 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.*; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermState; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; +import org.apache.lucene.index.*; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; +import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; +import org.apache.lucene.util.*; import org.apache.lucene.util.PriorityQueue; -import org.apache.lucene.util.TermContext; -import org.apache.lucene.util.ToStringUtils; + +import java.io.IOException; +import java.util.*; /** * MultiPhraseQuery is a generalized version of PhraseQuery, with an added @@ -236,8 +224,8 @@ public class MultiPhraseQuery extends Query { docFreq = termsEnum.docFreq(); } - - postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms); + TermQuery.TermDocsEnumFactory factory = new TermQuery.TermDocsEnumFactory(termsEnum, postingsEnum, postingsEnum, acceptDocs); + postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(factory, termsEnum.docFreq() , positions.get(pos).intValue(), terms); } // sort by increasing docFreq order diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index f96b23a..d295dd6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -17,28 +17,16 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.*; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; +import org.apache.lucene.util.*; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Set; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermState; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.TermContext; -import org.apache.lucene.util.ToStringUtils; - /** A Query that matches documents containing a particular sequence of terms. * A PhraseQuery is built by QueryParser for input like "new york". * @@ -135,14 +123,16 @@ public class PhraseQuery extends Query { } static class PostingsAndFreq implements Comparable { + final TermQuery.TermDocsEnumFactory factory; final DocsAndPositionsEnum postings; final int docFreq; final int position; final Term[] terms; final int nTerms; // for faster comparisons - public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) { - this.postings = postings; + public PostingsAndFreq(TermQuery.TermDocsEnumFactory factory, int docFreq, int position, Term... terms) throws IOException { + this.factory = factory; + this.postings = factory.docsAndPositionsEnum(false); this.docFreq = docFreq; this.position = position; nTerms = terms==null ? 0 : terms.length; @@ -253,8 +243,9 @@ public class PhraseQuery extends Query { return null; } - // Reuse single TermsEnum below: - final TermsEnum te = fieldTerms.iterator(null); + // Can't reuse the TermsEnum, because we need to preserve a separate one for each + // Term to later get the positions information. + //final TermsEnum te = fieldTerms.iterator(null); for (int i = 0; i < terms.size(); i++) { final Term t = terms.get(i); @@ -263,6 +254,7 @@ public class PhraseQuery extends Query { assert termNotInReader(reader, field, t.bytes()): "no termstate found but term exists in reader"; return null; } + final TermsEnum te = fieldTerms.iterator(null); te.seekExact(t.bytes(), state); DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, false); @@ -273,7 +265,8 @@ public class PhraseQuery extends Query { // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")"); } - postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t); + TermQuery.TermDocsEnumFactory factory = new TermQuery.TermDocsEnumFactory(te, postingsEnum, postingsEnum, acceptDocs); + postingsFreqs[i] = new PostingsAndFreq(factory, te.docFreq(), positions.get(i).intValue(), t); } // sort by increasing docFreq order diff --git a/lucene/core/src/java/org/apache/lucene/search/positions/BlockPositionIterator.java b/lucene/core/src/java/org/apache/lucene/search/positions/BlockPositionIterator.java index 9f2c1d2..f51f28a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/positions/BlockPositionIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/positions/BlockPositionIterator.java @@ -78,6 +78,19 @@ public final class BlockPositionIterator extends PositionIntervalIterator { this.gaps = gaps; } + public BlockPositionIterator(Scorer scorer, int[] gaps, PositionIntervalIterator[] iterators) { + super(scorer); + assert iterators.length > 1; + this.iterators = iterators; + intervals = new PositionInterval[iterators.length]; + lastIter = iterators.length - 1; + this.gaps = gaps; + } + + public BlockPositionIterator(Scorer scorer, PositionIntervalIterator[] iterators) { + this(scorer, defaultGaps(iterators.length), iterators); + } + @Override public PositionInterval next() throws IOException { if ((intervals[0] = iterators[0].next()) == null) { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java index 6bd75da..8a6a119 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java @@ -31,7 +31,6 @@ import org.apache.lucene.search.positions.PositionIntervalIterator.PositionInter import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; -import org.junit.Ignore; import java.io.IOException; @@ -262,7 +261,7 @@ public class PosHighlighterTest extends LuceneTestCase { /* * Failing ... PhraseQuery scorer needs positions()? */ - @Ignore + //@Ignore public void testPhraseOriginal() throws Exception { insertDocs(analyzer, "This is a test"); PhraseQuery pq = new PhraseQuery();