diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 1360a7a..31c7b1d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -17,14 +17,7 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.*; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.*; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs; import org.apache.lucene.search.TermQuery.TermDocsEnumFactory; @@ -34,6 +27,12 @@ import org.apache.lucene.search.similarities.Similarity.ExactSimScorer; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + /** A Query that matches documents matching boolean combinations of other * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other * BooleanQuerys. @@ -376,8 +375,8 @@ public class BooleanQuery extends Query implements Iterable { // and fallback to full match-only scorer: return createMatchOnlyConjunctionTermScorer(context, acceptDocs); } - TermDocsEnumFactory factory = new TermDocsEnumFactory(termsEnum, docsAndFreqsEnum, docsAndFreqsEnum, acceptDocs); - docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docFreq(), docScorer, factory); + TermDocsEnumFactory factory = new TermDocsEnumFactory(termsEnum, acceptDocs); + docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docFreq(), docScorer, docsAndFreqsEnum, factory); } return new ConjunctionTermScorer(this, disableCoord ? 1.0f : coord( docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs); @@ -394,8 +393,8 @@ public class BooleanQuery extends Query implements Iterable { return null; } final ExactSimScorer docScorer = weight.createDocScorer(context); - TermDocsEnumFactory factory = new TermDocsEnumFactory(termsEnum, termsEnum.docs(acceptDocs, null, false), null, acceptDocs); - docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docFreq(), docScorer, factory); + TermDocsEnumFactory factory = new TermDocsEnumFactory(termsEnum, acceptDocs); + docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docFreq(), docScorer, termsEnum.docs(acceptDocs, null, false), factory); } return new MatchOnlyConjunctionTermScorer(this, disableCoord ? 1.0f : coord( diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java index dbf1c0e..655f7f2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java @@ -105,16 +105,16 @@ class ConjunctionTermScorer extends Scorer { } static final class DocsAndFreqs { - final DocsEnum docsAndFreqs; + //final DocsEnum docsAndFreqs; final DocsEnum docs; final int docFreq; final ExactSimScorer docScorer; int doc = -1; private final TermDocsEnumFactory factory; - DocsAndFreqs( int docFreq, ExactSimScorer docScorer, TermDocsEnumFactory factory) throws IOException { - this.docsAndFreqs = factory.docsAndFreqsEnum(); - this.docs = factory.docsEnum(); + DocsAndFreqs( int docFreq, ExactSimScorer docScorer, DocsEnum docs, TermDocsEnumFactory factory) throws IOException { + //this.docsAndFreqs = factory.docsAndFreqsEnum(); + this.docs = docs; this.docFreq = docFreq; this.docScorer = docScorer; this.factory = factory; diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index c7fecc8..8845cb0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -206,7 +206,7 @@ public class MultiPhraseQuery extends Query { // None of the terms are in this reader return null; } - factory = null; // nocommit - what to do here + factory = new MultiTermDocsEnumFactory(liveDocs, context, terms, termContexts, termsEnum); } else { final Term term = terms[0]; TermState termState = termContexts.get(term).get(context.ord); @@ -223,8 +223,7 @@ public class MultiPhraseQuery extends Query { throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); } - docFreq = termsEnum.docFreq(); - factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(term.bytes()), termState, termsEnum, postingsEnum, postingsEnum, acceptDocs); + factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(term.bytes()), termsEnum, acceptDocs); } postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, factory, termsEnum.docFreq() , positions.get(pos).intValue(), terms); @@ -393,6 +392,27 @@ public class MultiPhraseQuery extends Query { } return true; } + + private static class MultiTermDocsEnumFactory extends TermQuery.TermDocsEnumFactory { + + AtomicReaderContext context; + Term[] terms; + Map termContexts; + + MultiTermDocsEnumFactory(Bits liveDocs, AtomicReaderContext context, Term[] terms, + Map termContexts, TermsEnum termsEnum) throws IOException { + super(termsEnum, liveDocs); + this.context = context; + this.terms = terms; + this.termContexts = termContexts; + } + + @Override + public DocsAndPositionsEnum docsAndPositionsEnum(boolean offsets) throws IOException { + return new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum, offsets); + } + + } } /** @@ -464,7 +484,13 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { private DocsQueue _queue; private IntQueue _posList; - public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum) throws IOException { + public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, + Map termContexts, TermsEnum termsEnum) throws IOException { + this(liveDocs, context, terms, termContexts, termsEnum, false); + } + + public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, + Map termContexts, TermsEnum termsEnum, boolean needsOffsets) throws IOException { List docsEnums = new LinkedList(); for (int i = 0; i < terms.length; i++) { final Term term = terms[i]; @@ -474,7 +500,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { continue; } termsEnum.seekExact(term.bytes(), termState); - DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, false); + DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, needsOffsets); if (postings == null) { // term does exist, but has no positions throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java index b2d4afe..dc2ea00 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java @@ -17,8 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Term; + import java.io.IOException; -import org.apache.lucene.index.*; /** * Position of a term in a document that takes into account the term offset within the phrase. @@ -84,6 +86,10 @@ final class PhrasePositions { if (rptGroup >=0 ) { s += " rpt:"+rptGroup+",i"+rptInd; } + s += " t: [" + terms[0]; + for (int i = 1; i < terms.length; i++) + s += "," + terms[1]; + s += "]"; return s; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index 1f0c1f4..5dd0904 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -124,7 +124,7 @@ public class PhraseQuery extends Query { static class PostingsAndFreq implements Comparable { final TermQuery.TermDocsEnumFactory factory; - final DocsAndPositionsEnum postings; + DocsAndPositionsEnum postings; final int docFreq; final int position; final Term[] terms; @@ -132,7 +132,8 @@ public class PhraseQuery extends Query { public PostingsAndFreq(DocsAndPositionsEnum postings, TermQuery.TermDocsEnumFactory factory, int docFreq, int position, Term... terms) throws IOException { this.factory = factory; - this.postings = postings; + //this.postings = postings; + this.postings = factory.docsAndPositionsEnum(false); this.docFreq = docFreq; this.position = position; nTerms = terms==null ? 0 : terms.length; @@ -150,6 +151,10 @@ public class PhraseQuery extends Query { } } + public void reset(boolean needsOffsets) throws IOException { + this.postings = factory.docsAndPositionsEnum(needsOffsets); + } + public int compareTo(PostingsAndFreq other) { if (docFreq != other.docFreq) { return docFreq - other.docFreq; @@ -263,7 +268,7 @@ public class PhraseQuery extends Query { // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")"); } - TermQuery.TermDocsEnumFactory factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(t.bytes()), state, te, null, null, acceptDocs); + TermQuery.TermDocsEnumFactory factory = new TermQuery.TermDocsEnumFactory(BytesRef.deepCopyOf(t.bytes()), te, acceptDocs); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, factory, te.docFreq(), positions.get(i).intValue(), t); } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java index 19644b3..8389262 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java @@ -17,10 +17,10 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.search.similarities.Similarity; +import java.io.IOException; + /** Expert: Scoring functionality for phrase queries. *
A document is considered matching if it contains the phrase-query terms * at "valid" positions. What "valid positions" are @@ -37,22 +37,29 @@ abstract class PhraseScorer extends Scorer { private float freq; //phrase frequency in current doc as computed by phraseFreq(). final Similarity.SloppySimScorer docScorer; + private PhraseQuery.PostingsAndFreq[] postings; PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - Similarity.SloppySimScorer docScorer) { + Similarity.SloppySimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; + this.postings = postings; + reset(false); + } + void reset(boolean needsOffsets) throws IOException { // convert tps to a list of phrase positions. // note: phrase-position differs from term-position in that its position // reflects the phrase offset: pp.pos = tp.pos - offset. // this allows to easily identify a matching (exact) phrase // when all PhrasePositions have exactly the same position. if (postings.length > 0) { + postings[0].reset(needsOffsets); min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms); max = min; max.doc = -1; for (int i = 1; i < postings.length; i++) { + postings[i].reset(needsOffsets); PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms); max.next = pp; max = pp; diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index c02007b..4a8e92e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -37,10 +37,11 @@ final class SloppyPhraseScorer extends PhraseScorer { private boolean checkedRpts; // flag to only check for repetitions in first candidate doc private boolean hasMultiTermRpts; // private PhrasePositions[][] rptGroups; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset - private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps + private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps + PhrasePositionsIterator ppIt = new PhrasePositionsIterator(); SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - int slop, Similarity.SloppySimScorer docScorer) { + int slop, Similarity.SloppySimScorer docScorer) throws IOException { super(weight, postings, docScorer); this.slop = slop; this.numPostings = postings==null ? 0 : postings.length; @@ -71,32 +72,56 @@ final class SloppyPhraseScorer extends PhraseScorer { return 0.0f; } float freq = 0.0f; - PhrasePositions pp = pq.pop(); - int matchLength = end - pp.position; - int next = pq.top().position; - while (advancePP(pp)) { - if (hasRpts && !advanceRpts(pp)) { - break; // pps exhausted - } - if (pp.position > next) { // done minimizing current match-length - if (matchLength <= slop) { - freq += docScorer.computeSlopFactor(matchLength); // score match - } - pq.add(pp); - pp = pq.pop(); - next = pq.top().position; - matchLength = end - pp.position; - } else { - int matchLength2 = end - pp.position; - if (matchLength2 < matchLength) { - matchLength = matchLength2; + + while (ppIt.next()) { + freq += docScorer.computeSlopFactor(ppIt.matchlength); + } + + return freq; + } + + private class PhrasePositionsIterator { + + public int matchlength; + public int headPosition = -1; + public int headStartOffset = 1; + public int headEndOffset = -1; + private PhrasePositions currentHead; + + public boolean next() throws IOException { + if (pq.size() < numPostings) + return false; + currentHead = pq.pop(); + matchlength = end - currentHead.position; + headPosition = currentHead.position + currentHead.offset; + headStartOffset = currentHead.postings.startOffset(); + headEndOffset = currentHead.postings.endOffset(); + int next = pq.top().position; + while (advancePP(currentHead)) { + if (hasRpts && !advanceRpts(currentHead)) + break; + if (currentHead.position > next) { // done minimizing current match-length + pq.add(currentHead); + if (matchlength <= slop) { + return true; + } + // not a match - keep going + currentHead = pq.pop(); + next = pq.top().position; + matchlength = end - currentHead.position; + } + else { + int newmatchlength = end - currentHead.position; + if (newmatchlength < matchlength) + matchlength = newmatchlength; } + headPosition = currentHead.position + currentHead.offset; + headStartOffset = currentHead.postings.startOffset(); + headEndOffset = currentHead.postings.endOffset(); } + return matchlength <= slop; } - if (matchLength <= slop) { - freq += docScorer.computeSlopFactor(matchLength); // score match - } - return freq; + } /** advance a PhrasePosition and update 'end', return false if exhausted */ @@ -481,8 +506,80 @@ final class SloppyPhraseScorer extends PhraseScorer { @Override public PositionIntervalIterator positions(boolean needsPayloads, boolean needsOffsets) throws IOException { - // nocommit implement this (and get a beer before you do so!) - throw new UnsupportedOperationException(); + // nocommit - payloads? + reset(needsOffsets); + return new SloppyPhrasePositionIntervalIterator(this); + } + + private class SloppyPhrasePositionIntervalIterator extends PositionIntervalIterator { + + private PositionInterval interval = new PositionInterval(Integer.MIN_VALUE, Integer.MIN_VALUE, -1, -1); + private PositionInterval[] subintervals; + + public SloppyPhrasePositionIntervalIterator(Scorer scorer) { + super(scorer); + subintervals = new PositionInterval[numPostings]; + for (int i = 0; i < numPostings; i++) { + subintervals[i] = new PositionInterval(Integer.MIN_VALUE, Integer.MIN_VALUE, -1, -1); + } + } + + @Override + public int advanceTo(int docId) throws IOException { + int currentdoc = -1; + for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { + currentdoc = pp.postings.advance(docId); + } + initPhrasePositions(); + return currentdoc; + } + + @Override + public PositionInterval next() throws IOException { + if (!ppIt.next()) + return null; + fillInterval(interval, ppIt, max); + fillInterval(subintervals[0], ppIt); + int i = 1; + for (PhrasePositions pp=min.next,prev=null; prev != max; pp=(prev=pp).next, i++) { + fillInterval(subintervals[i], pp); + } + return interval; + } + + @Override + public void collect() { + collector.collectComposite(scorer, interval, currentDoc); + int i = 0; + for (PhrasePositions pp=min,prev=null; prev != max; pp=(prev=pp).next, i++) { + collector.collectLeafPosition(scorer, subintervals[i], currentDoc); + } + } + + @Override + public PositionIntervalIterator[] subs(boolean inOrder) { + return EMPTY; + } + + private void fillInterval(PositionInterval i, PhrasePositionsIterator it) { + i.begin = it.headPosition; + i.end = it.headPosition; + i.offsetBegin = it.headStartOffset; + i.offsetEnd = it.headEndOffset; + } + + private void fillInterval(PositionInterval i, PhrasePositionsIterator it, PhrasePositions end) throws IOException { + i.begin = it.headPosition; + i.end = end.position + end.offset; + i.offsetBegin = it.headStartOffset; + i.offsetEnd = end.postings.endOffset(); + } + + private void fillInterval(PositionInterval i, PhrasePositions pp) throws IOException { + i.end = i.begin = pp.offset + pp.position; + i.offsetBegin = pp.postings.startOffset(); + i.offsetEnd = pp.postings.endOffset(); + } } // private void printQueue(PrintStream ps, PhrasePositions ext, String title) { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index ebc4458..0d0784d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -17,25 +17,14 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.*; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.ExactSimScorer; +import org.apache.lucene.util.*; + import java.io.IOException; import java.util.Set; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermState; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity.ExactSimScorer; -import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.ReaderUtil; -import org.apache.lucene.util.TermContext; -import org.apache.lucene.util.ToStringUtils; - /** * A Query that matches documents containing a term. This may be combined with * other terms with a {@link BooleanQuery}. @@ -94,7 +83,7 @@ public class TermQuery extends Query { } DocsEnum docs = termsEnum.docs(acceptDocs, null, true); if (docs != null) { - return new TermScorer(this, new TermDocsEnumFactory(termsEnum, docs, docs, acceptDocs), createDocScorer(context)); + return new TermScorer(this, docs, new TermDocsEnumFactory(termsEnum, acceptDocs), createDocScorer(context)); } else { // Index does not store freq info docs = termsEnum.docs(acceptDocs, null, false); @@ -248,43 +237,41 @@ public class TermQuery extends Query { } static class TermDocsEnumFactory { - private final TermsEnum termsEnum; - private final Bits liveDocs; - private final DocsEnum docs; - private final DocsEnum docsAndFreqs; - private final TermState state; - private BytesRef term; + protected final TermsEnum termsEnum; + protected final Bits liveDocs; + //private final DocsEnum docs; + //private final DocsEnum docsAndFreqs; + //private final TermState state; + protected BytesRef term; - TermDocsEnumFactory(TermsEnum termsEnum, DocsEnum docs, DocsEnum docsAndFreqs, Bits liveDocs) { - this(null, null, termsEnum, docs, docsAndFreqs, liveDocs); + TermDocsEnumFactory(TermsEnum termsEnum, Bits liveDocs) { + this(null, termsEnum, liveDocs); } - TermDocsEnumFactory(BytesRef term, TermState state, TermsEnum termsEnum, - DocsEnum docs, DocsEnum docsAndFreqs, Bits liveDocs) { + TermDocsEnumFactory(BytesRef term, TermsEnum termsEnum, Bits liveDocs) { this.termsEnum = termsEnum; this.liveDocs = liveDocs; - this.docs = docs; - this.docsAndFreqs = docsAndFreqs; - this.state = state; + //this.docs = docs; + //this.docsAndFreqs = docsAndFreqs; + //this.state = state; this.term = term; } + /* public DocsEnum docsEnum() throws IOException { return docs; } + */ public DocsAndPositionsEnum docsAndPositionsEnum(boolean offsets) throws IOException { - if (state != null) { + if (term != null) { assert term != null; - termsEnum.seekExact(term, state); + termsEnum.seekExact(term, false); } return termsEnum.docsAndPositions(liveDocs, null, offsets); } - - public DocsEnum docsAndFreqsEnum() throws IOException{ - return docsAndFreqs; - } + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 16aa231..98db23e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -45,10 +45,10 @@ final class TermScorer extends Scorer { * The Similarity.ExactSimScorer implementation * to be used for score computations. */ - TermScorer(Weight weight, TermDocsEnumFactory factory, Similarity.ExactSimScorer docScorer) throws IOException { + TermScorer(Weight weight, DocsEnum docsEnum, TermDocsEnumFactory factory, Similarity.ExactSimScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; - this.docsEnum = factory.docsAndFreqsEnum(); + this.docsEnum = docsEnum; this.factory = factory; } diff --git a/lucene/core/src/test/org/apache/lucene/search/positions/TestPositionOffsets.java b/lucene/core/src/test/org/apache/lucene/search/positions/TestPositionOffsets.java index 7d7545b..c9170a3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/positions/TestPositionOffsets.java +++ b/lucene/core/src/test/org/apache/lucene/search/positions/TestPositionOffsets.java @@ -16,8 +16,6 @@ package org.apache.lucene.search.positions; * limitations under the License. */ -import java.io.IOException; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; @@ -27,23 +25,14 @@ import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.Weight; +import org.apache.lucene.index.*; +import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +import org.junit.Ignore; + +import java.io.IOException; public class TestPositionOffsets extends LuceneTestCase { @@ -92,15 +81,18 @@ public class TestPositionOffsets extends LuceneTestCase { writer.addDocument(doc); } - public void testTermQueryWithOffsets() throws IOException { + private void testQuery(Query query, int[][] expectedOffsets) throws IOException { + testQuery(query, expectedOffsets, true); + } + + private void testQuery(Query query, int[][] expectedOffsets, boolean needsOffsets) throws IOException { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc); - addDocs(writer, true); + addDocs(writer, needsOffsets); IndexReader reader = writer.getReader(); IndexSearcher searcher = new IndexSearcher(reader); writer.close(); - Query query = new TermQuery(new Term("field", "porridge")); Weight weight = query.createWeight(searcher); IndexReaderContext topReaderContext = searcher.getTopReaderContext(); @@ -111,9 +103,10 @@ public class TestPositionOffsets extends LuceneTestCase { int nextDoc = scorer.nextDoc(); assertEquals(0, nextDoc); - PositionIntervalIterator positions = scorer.positions(false, true); - int[] startOffsets = new int[] { 6, 26, 47, 164, 184 }; - int[] endOffsets = new int[] { 14, 34, 55, 172, 192 }; + PositionIntervalIterator positions = scorer.positions(false, needsOffsets); + + int startOffsets[] = expectedOffsets[0]; + int endOffsets[] = expectedOffsets[1]; assertEquals(0, positions.advanceTo(nextDoc)); for (int i = 0; i < startOffsets.length; i++) { @@ -128,79 +121,69 @@ public class TestPositionOffsets extends LuceneTestCase { directory.close(); } - public void testTermQueryWithoutOffsets() throws IOException { - Directory directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc); - addDocs(writer, false); - - IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); - writer.close(); + public void testTermQueryWithOffsets() throws IOException { Query query = new TermQuery(new Term("field", "porridge")); + int[] startOffsets = new int[] { 6, 26, 47, 164, 184 }; + int[] endOffsets = new int[] { 14, 34, 55, 172, 192 }; + testQuery(query, new int[][] { startOffsets, endOffsets }); + } - Weight weight = query.createWeight(searcher); - IndexReaderContext topReaderContext = searcher.getTopReaderContext(); - AtomicReaderContext[] leaves = topReaderContext.leaves(); - assertEquals(1, leaves.length); - Scorer scorer = weight.scorer(leaves[0], - true, true, leaves[0].reader().getLiveDocs()); - - int nextDoc = scorer.nextDoc(); - assertEquals(0, nextDoc); - PositionIntervalIterator positions = scorer.positions(false, false); + public void testTermQueryWithoutOffsets() throws IOException { + Query query = new TermQuery(new Term("field", "porridge")); int[] startOffsets = new int[] { -1, -1, -1, -1, -1 }; int[] endOffsets = new int[] { -1, -1, -1, -1, -1 }; - - assertEquals(0, positions.advanceTo(nextDoc)); - for (int i = 0; i < startOffsets.length; i++) { - PositionIntervalIterator.PositionInterval interval = positions.next(); - assertEquals(startOffsets[i], interval.offsetBegin); - assertEquals(endOffsets[i], interval.offsetEnd); - } - - assertNull(positions.next()); - - reader.close(); - directory.close(); + testQuery(query, new int[][] { startOffsets, endOffsets }, false); } public void testBooleanQueryWithOffsets() throws IOException { - Directory directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc); - addDocs(writer, true); - - IndexReader reader = writer.getReader(); - IndexSearcher searcher = new IndexSearcher(reader); - writer.close(); BooleanQuery query = new BooleanQuery(); query.add(new BooleanClause(new TermQuery(new Term("field", "porridge")), BooleanClause.Occur.MUST)); query.add(new BooleanClause(new TermQuery(new Term("field", "nine")), BooleanClause.Occur.MUST)); - - Weight weight = query.createWeight(searcher); - IndexReaderContext topReaderContext = searcher.getTopReaderContext(); - AtomicReaderContext[] leaves = topReaderContext.leaves(); - assertEquals(1, leaves.length); - Scorer scorer = weight.scorer(leaves[0], - true, true, leaves[0].reader().getLiveDocs()); - - int nextDoc = scorer.nextDoc(); - assertEquals(0, nextDoc); - PositionIntervalIterator positions = scorer.positions(false, true); int[] startOffsetsConj = new int[] { 6, 26, 47, 67, 143}; int[] endOffsetsConj = new int[] { 71, 71, 71, 172, 172}; - assertEquals(0, positions.advanceTo(nextDoc)); - PositionIntervalIterator.PositionInterval interval; - int i = 0; - while((interval = positions.next()) != null) { - assertEquals(startOffsetsConj[i], interval.offsetBegin); - assertEquals(endOffsetsConj[i], interval.offsetEnd); - i++; - } - assertEquals(i, startOffsetsConj.length); - assertNull(positions.next()); + testQuery(query, new int[][] { startOffsetsConj, endOffsetsConj }); + } - reader.close(); - directory.close(); + public void testExactPhraseQuery() throws IOException { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "porridge")); + query.add(new Term("field", "hot!")); + int[] startOffsetsBlock = new int[] { 0, 158 }; + int[] endOffsetsBlock = new int[] { 19, 177 }; + testQuery(query, new int[][] { startOffsetsBlock, endOffsetsBlock }); + } + + public void testSloppyPhraseQuery() throws IOException { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "hot!")); + query.setSlop(1); + int[] startOffsetsBlock = new int[] { 0, 158 }; + int[] endOffsetsBlock = new int[] { 19, 177 }; + testQuery(query, new int[][] { startOffsetsBlock, endOffsetsBlock }); + } + + public void testManyTermSloppyPhraseQuery() throws IOException { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "porridge")); + query.add(new Term("field", "pot")); + query.setSlop(2); + int[] startOffsetsBlock = new int[] { 41 }; + int[] endOffsetsBlock = new int[] { 66 }; + testQuery(query, new int[][] { startOffsetsBlock, endOffsetsBlock }); + } + + @Ignore + public void testMultiTermPhraseQuery() throws IOException { + MultiPhraseQuery query = new MultiPhraseQuery(); + query.add(new Term("field", "pease")); + query.add(new Term("field", "porridge")); + query.add(new Term[] { new Term("field", "hot!"), new Term("field", "cold!")}); + int[] startOffsetsBlock = new int[] { 0, 21, 158, 179 }; + int[] endOffsetsBlock = new int[] { 19, 41, 177, 199 }; + testQuery(query, new int[][] { startOffsetsBlock, endOffsetsBlock }); } } \ No newline at end of file diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java index 434b0af..e22aac1 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/poshighlight/PosHighlighterTest.java @@ -368,6 +368,17 @@ public class PosHighlighterTest extends LuceneTestCase { frags[0]); close(); } + + public void testSloppyPhraseQuery() throws Exception { + insertDocs(analyzer, "a b c d a b c d e f"); + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term(F, "c")); + pq.add(new Term(F, "a")); + pq.setSlop(2); + String frags[] = doSearch(pq, 50); + assertEquals("a b c d a b c d e f", frags[0]); + close(); + } public static class BlockPositionIteratorFilter implements PositionIntervalFilter {