Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1526306) +++ lucene/CHANGES.txt (working copy) @@ -89,6 +89,11 @@ its state, which could result in exceptions being thrown, as well as incorrect ordinals returned from getParent. (Shai Erera) +* LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always + return a ConstantScoreQuery to make scoring consistent. Previously it + returned an empty unwrapped BooleanQuery, if no terms were available, + which has a different query norm. (Nik Everett, Uwe Schindler) + API Changes: * LUCENE-5222: Add SortField.needsScores(). Previously it was not possible Index: lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (revision 1526306) +++ lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (working copy) @@ -96,17 +96,17 @@ final int size = col.pendingTerms.size(); if (col.hasCutOff) { return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query); - } else if (size == 0) { - return getTopLevelQuery(); } else { final BooleanQuery bq = getTopLevelQuery(); - final BytesRefHash pendingTerms = col.pendingTerms; - final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); - for(int i = 0; i < size; i++) { - final int pos = sort[i]; - // docFreq is not used for constant score here, we pass 1 - // to explicitely set a fake value, so it's not calculated - addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); + if (size > 0) { + final BytesRefHash pendingTerms = col.pendingTerms; + final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); + for(int i = 0; i < size; i++) { + final int pos = sort[i]; + // docFreq is not used for constant score here, we pass 1 + // to explicitely set a fake value, so it's not calculated + addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); + } } // Strip scores final Query result = new ConstantScoreQuery(bq); Index: lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java (revision 1526306) +++ lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java (working copy) @@ -87,9 +87,6 @@ @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { final BooleanQuery bq = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query); - // TODO: if empty boolean query return NullQuery? - if (bq.clauses().isEmpty()) - return bq; // strip the scores off final Query result = new ConstantScoreQuery(bq); result.setBoost(query.getBoost()); Index: lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 1526306) +++ lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy) @@ -160,9 +160,63 @@ result[i].score, SCORE_COMP_THRESH); } + result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, 1000).scoreDocs; + numHits = result.length; + assertEquals("wrong number of results", 6, numHits); + for (int i = 0; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score, SCORE_COMP_THRESH); + } } @Test + public void testEqualScoresWhenNoHits() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexSearcher search = newSearcher(reader); + + ScoreDoc[] result; + + // some hits match more terms then others, score should be the same + + TermQuery dummyTerm = new TermQuery(new Term("data", "1")); + + BooleanQuery bq = new BooleanQuery(); + bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc + bq.add(csrq("data", "#", "#", T, T), BooleanClause.Occur.SHOULD); // hits no docs + result = search.search(bq, null, 1000).scoreDocs; + int numHits = result.length; + assertEquals("wrong number of results", 1, numHits); + float score = result[0].score; + for (int i = 1; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score, SCORE_COMP_THRESH); + } + + bq = new BooleanQuery(); + bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc + bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), BooleanClause.Occur.SHOULD); // hits no docs + result = search.search(bq, null, 1000).scoreDocs; + numHits = result.length; + assertEquals("wrong number of results", 1, numHits); + for (int i = 0; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score, SCORE_COMP_THRESH); + } + + bq = new BooleanQuery(); + bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc + bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), BooleanClause.Occur.SHOULD); // hits no docs + result = search.search(bq, null, 1000).scoreDocs; + numHits = result.length; + assertEquals("wrong number of results", 1, numHits); + for (int i = 0; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score, SCORE_COMP_THRESH); + } + } + + @Test public void testBoost() throws IOException { // NOTE: uses index build in *this* setUp