Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 1065016) +++ CHANGES.txt (working copy) @@ -243,6 +243,10 @@ that warming is free to do whatever it needs to. (Earwin Burrfoot via Mike McCandless) +* LUCENE-3029: Fix corner case when MultiPhraseQuery is used with zero + position-increment tokens that would sometimes assign different + scores to identical docs. (Mike McCandless) + * LUCENE-2486: Fixed intermittent FileNotFoundException on doc store files when a mergedSegmentWarmer is set on IndexWriter. (Mike McCandless) Index: src/test/org/apache/lucene/search/TestMultiPhraseQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (revision 1065016) +++ src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (working copy) @@ -17,24 +17,29 @@ * limitations under the License. */ -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.store.RAMDirectory; +import java.io.IOException; +import java.io.Reader; +import java.util.Collections; +import java.util.LinkedList; + +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; - +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; -import java.io.IOException; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.Collections; - /** * This class tests the MultiPhraseQuery class. * @@ -228,4 +233,95 @@ writer.addDocument(doc); } + private static class TokenAndPos { + public final String token; + public final int pos; + public TokenAndPos(String token, int pos) { + this.token = token; + this.pos = pos; + } + } + + private static class CannedAnalyzer extends Analyzer { + private final TokenAndPos[] tokens; + + public CannedAnalyzer(TokenAndPos[] tokens) { + this.tokens = tokens; + } + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new CannedTokenizer(tokens); + } + } + + private static class CannedTokenizer extends Tokenizer { + private final TokenAndPos[] tokens; + private int upto = 0; + private int lastPos = 0; + private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + + public CannedTokenizer(TokenAndPos[] tokens) { + this.tokens = tokens; + } + + @Override + public final boolean incrementToken() throws IOException { + clearAttributes(); + if (upto < tokens.length) { + final TokenAndPos token = tokens[upto++]; + termAtt.setTermBuffer(token.token); + posIncrAtt.setPositionIncrement(token.pos - lastPos); + lastPos = token.pos; + return true; + } else { + return false; + } + } + } + + public void testZeroPosIncr() throws IOException { + Directory dir = new RAMDirectory(); + final TokenAndPos[] tokens = new TokenAndPos[3]; + tokens[0] = new TokenAndPos("a", 0); + tokens[1] = new TokenAndPos("b", 0); + tokens[2] = new TokenAndPos("c", 0); + + IndexWriter writer = new IndexWriter(dir, new CannedAnalyzer(tokens), true, IndexWriter.MaxFieldLength.LIMITED); + Document doc = new Document(); + doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.addDocument(doc); + IndexReader r = writer.getReader(); + writer.close(); + IndexSearcher s = new IndexSearcher(r); + MultiPhraseQuery mpq = new MultiPhraseQuery(); + //mpq.setSlop(1); + + // NOTE: not great that if we do the else clause here we + // get different scores! MultiPhraseQuery counts that + // phrase as occurring twice per doc (it should be 1, I + // think?). This is because MultipleTermPositions is able to + // return the same position more than once (0, in this + // case): + if (true) { + mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0); + mpq.add(new Term[] {new Term("field", "a")}, 0); + } else { + mpq.add(new Term[] {new Term("field", "a")}, 0); + mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0); + } + TopDocs hits = s.search(mpq, 2); + assert hits.totalHits == 2; + assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5); + /* + for(int hit=0;hit