Index: src/test/org/apache/lucene/search/TestPhraseQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestPhraseQuery.java (revision 480218) +++ src/test/org/apache/lucene/search/TestPhraseQuery.java (working copy) @@ -35,6 +35,10 @@ * @author Erik Hatcher */ public class TestPhraseQuery extends TestCase { + + /** threshold for comparing floats */ + public static final float SCORE_COMP_THRESH = 1e-6f; + private IndexSearcher searcher; private PhraseQuery query; private RAMDirectory directory; @@ -57,6 +61,7 @@ doc.add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED)); Fieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED); doc.add(repeatedField); + doc.add(new Field("palindrome", "one two three two one", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); writer.optimize(); @@ -341,12 +346,144 @@ query.add(new Term("repeated", "part")); query.add(new Term("repeated", "second")); query.add(new Term("repeated", "part")); + query.setSlop(100); + + Hits hits = searcher.search(query); + assertEquals("slop of 100 just right", 1, hits.length()); + QueryUtils.check(query,searcher); + query.setSlop(99); + hits = searcher.search(query); + assertEquals("slop of 99 not enough", 0, hits.length()); + QueryUtils.check(query,searcher); + } + + public void testNonExistingWrappedPhrase() throws IOException { + query.add(new Term("field", "three")); + query.add(new Term("field", "four")); + query.add(new Term("field", "three")); + query.setSlop(1000); // no matter what slop we set, phrase ain't there + Hits hits = searcher.search(query); - assertEquals(0, hits.length()); + assertEquals("unexisting wrapped phrase does not exist in doc", 0, hits.length()); QueryUtils.check(query,searcher); + } + /** + * Phrase of size 2 occuriong twice, once in order and once in reverse, + * because doc is a palyndrome, is counted twice. + * Also, in this case order in query does not matter. + * Also, when an exact match is found, both sloppy scorer and exact scorer scores the same. + */ + public void testPalyndrome2() throws Exception { + + // search on non palyndrome, find phrase with no slop, using exact phrase scorer + query.setSlop(0); // to use exact phrase scorer + query.add(new Term("field", "two")); + query.add(new Term("field", "three")); + Hits hits = searcher.search(query); + assertEquals("phrase found with exact phrase scorer", 1, hits.length()); + float score0 = hits.score(0); + //System.out.println("(exact) field: two three: "+score0); + QueryUtils.check(query,searcher); + + // search on non palyndrome, find phrase with slop 2, though no slop required here. + query.setSlop(2); // to use sloppy scorer + hits = searcher.search(query); + assertEquals("just sloppy enough", 1, hits.length()); + float score1 = hits.score(0); + //System.out.println("(sloppy) field: two three: "+score1); + assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH); + QueryUtils.check(query,searcher); + + // search ordered in palyndrome, find it twice + query = new PhraseQuery(); + query.setSlop(2); // must be at least two for both ordered and reversed to match + query.add(new Term("palindrome", "two")); + query.add(new Term("palindrome", "three")); + hits = searcher.search(query); + assertEquals("just sloppy enough", 1, hits.length()); + float score2 = hits.score(0); + //System.out.println("palindrome: two three: "+score2); + QueryUtils.check(query,searcher); + + assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESHmaxDiff) { - throw new RuntimeException("ERROR matching docs:" - +"\n\tscorer.more=" + more + " doc="+sdoc[0] + " score="+scorerScore - +"\n\thitCollector.doc=" + doc + " score="+score - +"\n\t Scorer=" + scorer - +"\n\t Query=" + q - +"\n\t Searcher=" + s - ); + final int skip_op = 0; + final int next_op = 1; + final int orders [][] = { + {skip_op}, + {next_op}, + {skip_op, next_op}, + {next_op, skip_op}, + {skip_op, skip_op, next_op, next_op}, + {next_op, next_op, skip_op, skip_op}, + {skip_op, skip_op, skip_op, next_op, next_op}, + }; + for (int k = 0; k < orders.length; k++) { + final int order[] = orders[k]; + //System.out.print("Order:");for (int i = 0; i < order.length; i++) System.out.print(order[i]==skip_op ? " skip()":" next()"); System.out.println(); + final int opidx[] = {0}; + + final Weight w = q.weight(s); + final Scorer scorer = w.scorer(s.getIndexReader()); + + // FUTURE: ensure scorer.doc()==-1 + + final int[] sdoc = new int[] {-1}; + final float maxDiff = 1e-5f; + s.search(q,new HitCollector() { + public void collect(int doc, float score) { + try { + int op = order[(opidx[0]++)%order.length]; + //System.out.println(op==skip_op ? "skip("+(sdoc[0]+1)+")":"next()"); + boolean more = op==skip_op ? scorer.skipTo(sdoc[0]+1) : scorer.next(); + sdoc[0] = scorer.doc(); + float scorerScore = scorer.score(); + float scoreDiff = Math.abs(score-scorerScore); + if (more==false || doc != sdoc[0] || scoreDiff>maxDiff) { + StringBuffer sbord = new StringBuffer(); + for (int i = 0; i < order.length; i++) + sbord.append(order[i]==skip_op ? " skip()":" next()"); + throw new RuntimeException("ERROR matching docs:" + +"\n\tscorer.more=" + more + " doc="+sdoc[0] + " score="+scorerScore + +"\n\thitCollector.doc=" + doc + " score="+score + +"\n\t Scorer=" + scorer + +"\n\t Query=" + q + +"\n\t Searcher=" + s + +"\n\t Order=" + sbord + ); + } + } catch (IOException e) { + throw new RuntimeException(e); } - } catch (IOException e) { - throw new RuntimeException(e); } - } - }); - - // make sure next call to scorer is false. - TestCase.assertFalse((which[0]++&0x02)==0 ? scorer.skipTo(sdoc[0]+1) : scorer.next()); + }); + + // make sure next call to scorer is false. + int op = order[(opidx[0]++)%order.length]; + //System.out.println(op==skip_op ? "last: skip()":"last: next()"); + boolean more = op==skip_op ? scorer.skipTo(sdoc[0]+1) : scorer.next(); + TestCase.assertFalse(more); + } } }