Index: . =================================================================== --- . (revision 1201329) +++ . (working copy) Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk:r1201329 Index: lucene =================================================================== --- lucene (revision 1201329) +++ lucene (working copy) Property changes on: lucene ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene:r1201329 Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1201329) +++ lucene/CHANGES.txt (working copy) @@ -111,6 +111,11 @@ deprecated. IndexCommit.isOptimized was replaced with getSegmentCount. (Robert Muir, Mike McCandless) +* LUCENE-3205: Deprecated MultiTermQuery.getTotalNumerOfTerms() [and + related methods], as the numbers returned are not useful + for multi-segment indexes. They were only needed for tests of + NumericRangeQuery. (Mike McCandless, Uwe Schindler) + New Features * LUCENE-3448: Added FixedBitSet.and(other/DISI), andNot(other/DISI). Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (revision 1201329) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (working copy) @@ -130,9 +130,6 @@ final String printHitsField = getRunData().getConfig().get("print.hits.field", null); if (hits != null && printHitsField != null && printHitsField.length() > 0) { - if (q instanceof MultiTermQuery) { - System.out.println("MultiTermQuery term count = " + ((MultiTermQuery) q).getTotalNumberOfTerms()); - } System.out.println("totalHits = " + hits.totalHits); System.out.println("maxDoc() = " + reader.maxDoc()); System.out.println("numDocs() = " + reader.numDocs()); Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1201329) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -268,7 +268,9 @@ * non-constant-score mode. In constant-score mode you get the total number of * terms seeked for all segments / sub-readers. * @see #clearTotalNumberOfTerms + * @deprecated Don't use this method, as its not thread safe and useless. */ + @Deprecated public int getTotalNumberOfTerms() { return numberOfTerms; } @@ -277,11 +279,17 @@ * Expert: Resets the counting of unique terms. * Do this before executing the query/filter. * @see #getTotalNumberOfTerms + * @deprecated Don't use this method, as its not thread safe and useless. */ + @Deprecated public void clearTotalNumberOfTerms() { numberOfTerms = 0; } + /** + * @deprecated Don't use this method, as its not thread safe and useless. + */ + @Deprecated protected void incTotalNumberOfTerms(int inc) { numberOfTerms += inc; } Index: lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 1201329) +++ lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy) @@ -80,7 +80,9 @@ * search, be sure to first reset the term counter * with {@link #clearTotalNumberOfTerms}. * @see #clearTotalNumberOfTerms + * @deprecated Don't use this method, as its not thread safe and useless. */ + @Deprecated public int getTotalNumberOfTerms() { return query.getTotalNumberOfTerms(); } @@ -89,7 +91,9 @@ * Expert: Resets the counting of unique terms. * Do this before executing the filter. * @see #getTotalNumberOfTerms + * @deprecated Don't use this method, as its not thread safe and useless. */ + @Deprecated public void clearTotalNumberOfTerms() { query.clearTotalNumberOfTerms(); } Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 1201329) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy) @@ -28,6 +28,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util._TestUtil; import org.junit.Test; @@ -115,35 +116,27 @@ int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3); NumericRangeQuery q = NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); NumericRangeFilter f = NumericRangeFilter.newIntRange(field, precisionStep, lower, upper, true, true); - int lastTerms = 0; for (byte i=0; i<3; i++) { TopDocs topDocs; - int terms; String type; - q.clearTotalNumberOfTerms(); - f.clearTotalNumberOfTerms(); switch (i) { case 0: type = " (constant score filter rewrite)"; q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); - terms = q.getTotalNumberOfTerms(); break; case 1: type = " (constant score boolean rewrite)"; q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); - terms = q.getTotalNumberOfTerms(); break; case 2: type = " (filter)"; topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER); - terms = f.getTotalNumberOfTerms(); break; default: return; } - if (VERBOSE) System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+"."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count"+type, count, sd.length ); @@ -151,12 +144,6 @@ assertEquals("First doc"+type, 2*distance+startOffset, Integer.parseInt(doc.get(field)) ); doc=searcher.doc(sd[sd.length-1].doc); assertEquals("Last doc"+type, (1+count)*distance+startOffset, Integer.parseInt(doc.get(field)) ); - if (i>0 && - (searcher.getIndexReader().getSequentialSubReaders() == null || - searcher.getIndexReader().getSequentialSubReaders().length == 1)) { - assertEquals("Distinct term number is equal for all query types", lastTerms, terms); - } - lastTerms = terms; } } @@ -203,7 +190,6 @@ int upper=(count-1)*distance + (distance/3) + startOffset; NumericRangeQuery q=NumericRangeQuery.newIntRange(field, precisionStep, null, upper, true, true); TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); - if (VERBOSE) System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", count, sd.length ); @@ -244,7 +230,6 @@ int lower=(count-1)*distance + (distance/3) +startOffset; NumericRangeQuery q=NumericRangeQuery.newIntRange(field, precisionStep, lower, null, true, true); TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); - if (VERBOSE) System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", noDocs-count, sd.length ); @@ -330,8 +315,8 @@ } private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { - String field="field"+precisionStep; - int termCountT=0,termCountC=0; + String field=StringHelper.intern("field"+precisionStep); + int totalTermCountT=0,totalTermCountC=0,termCountT,termCountC; int num = _TestUtil.nextInt(random, 10, 20); for (int i = 0; i < num; i++) { int lower=(int)(random.nextDouble()*noDocs*distance)+startOffset; @@ -345,44 +330,77 @@ TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCountT += tq.getTotalNumberOfTerms(); - termCountC += cq.getTotalNumberOfTerms(); + totalTermCountT += termCountT = countTerms(tq, field); + totalTermCountC += termCountC = countTerms(cq, field); + checkTermCounts(precisionStep, termCountT, termCountC); // test exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false); cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCountT += tq.getTotalNumberOfTerms(); - termCountC += cq.getTotalNumberOfTerms(); + totalTermCountT += termCountT = countTerms(tq, field); + totalTermCountC += termCountC = countTerms(cq, field); + checkTermCounts(precisionStep, termCountT, termCountC); // test left exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true); cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, true); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCountT += tq.getTotalNumberOfTerms(); - termCountC += cq.getTotalNumberOfTerms(); + totalTermCountT += termCountT = countTerms(tq, field); + totalTermCountC += termCountC = countTerms(cq, field); + checkTermCounts(precisionStep, termCountT, termCountC); // test right exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false); cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCountT += tq.getTotalNumberOfTerms(); - termCountC += cq.getTotalNumberOfTerms(); + totalTermCountT += termCountT = countTerms(tq, field); + totalTermCountC += termCountC = countTerms(cq, field); + checkTermCounts(precisionStep, termCountT, termCountC); } - if (precisionStep == Integer.MAX_VALUE && - (searcher.getIndexReader().getSequentialSubReaders() == null || - searcher.getIndexReader().getSequentialSubReaders().length == 1)) { - assertEquals("Total number of terms should be equal for unlimited precStep", termCountT, termCountC); - } else if (VERBOSE) { + + checkTermCounts(precisionStep, totalTermCountT, totalTermCountC); + if (VERBOSE && precisionStep != Integer.MAX_VALUE) { System.out.println("Average number of terms during random search on '" + field + "':"); - System.out.println(" Trie query: " + (((double)termCountT)/(num * 4))); - System.out.println(" Classical query: " + (((double)termCountC)/(num * 4))); + System.out.println(" Numeric query: " + (((double)totalTermCountT)/(num * 4))); + System.out.println(" Classical query: " + (((double)totalTermCountC)/(num * 4))); } } + private int countTerms(MultiTermQuery q, String field) throws Exception { + FilteredTermEnum termEnum = q.getEnum(reader); + try { + int count = 0; + Term last = null; + do { + final Term cur = termEnum.term(); + if (cur != null) { + count++; + assertSame(field, cur.field()); + if (last != null) { + assertTrue(last.text().compareTo(cur.text()) < 0); + } + last = cur; + } else break; + } while (termEnum.next()); + assertFalse(termEnum.next()); + return count; + } finally { + termEnum.close(); + } + } + + private void checkTermCounts(int precisionStep, int termCountT, int termCountC) { + if (precisionStep == Integer.MAX_VALUE) { + assertEquals("Number of terms should be equal for unlimited precStep", termCountC, termCountT); + } else { + assertTrue("Number of terms for NRQ should be <= compared to classical TRQ", termCountT <= termCountC); + } + } + @Test public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { testRandomTrieAndClassicRangeQuery(8); @@ -558,38 +576,4 @@ assertFalse(q2.equals(q1)); } - private void testEnum(int lower, int upper) throws Exception { - NumericRangeQuery q = NumericRangeQuery.newIntRange("field4", 4, lower, upper, true, true); - FilteredTermEnum termEnum = q.getEnum(searcher.getIndexReader()); - try { - int count = 0; - do { - final Term t = termEnum.term(); - if (t != null) { - final int val = NumericUtils.prefixCodedToInt(t.text()); - assertTrue("value not in bounds", val >= lower && val <= upper); - count++; - } else break; - } while (termEnum.next()); - assertFalse(termEnum.next()); - if (VERBOSE) System.out.println("TermEnum on 'field4' for range [" + lower + "," + upper + "] contained " + count + " terms."); - } finally { - termEnum.close(); - } - } - - @Test - public void testEnum() throws Exception { - int count=3000; - int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3); - // test enum with values - testEnum(lower, upper); - // test empty enum - testEnum(upper, lower); - // test empty enum outside of bounds - lower = distance*noDocs+startOffset; - upper = 2 * lower; - testEnum(lower, upper); - } - } Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 1201329) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy) @@ -23,10 +23,12 @@ import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util._TestUtil; import org.junit.Test; @@ -118,35 +120,27 @@ long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3); NumericRangeQuery q = NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); NumericRangeFilter f = NumericRangeFilter.newLongRange(field, precisionStep, lower, upper, true, true); - int lastTerms = 0; for (byte i=0; i<3; i++) { TopDocs topDocs; - int terms; String type; - q.clearTotalNumberOfTerms(); - f.clearTotalNumberOfTerms(); switch (i) { case 0: type = " (constant score filter rewrite)"; q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); - terms = q.getTotalNumberOfTerms(); break; case 1: type = " (constant score boolean rewrite)"; q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); - terms = q.getTotalNumberOfTerms(); break; case 2: type = " (filter)"; topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER); - terms = f.getTotalNumberOfTerms(); break; default: return; } - if (VERBOSE) System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+"."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count"+type, count, sd.length ); @@ -154,12 +148,6 @@ assertEquals("First doc"+type, 2*distance+startOffset, Long.parseLong(doc.get(field)) ); doc=searcher.doc(sd[sd.length-1].doc); assertEquals("Last doc"+type, (1+count)*distance+startOffset, Long.parseLong(doc.get(field)) ); - if (i>0 && - (searcher.getIndexReader().getSequentialSubReaders() == null || - searcher.getIndexReader().getSequentialSubReaders().length == 1)) { - assertEquals("Distinct term number is equal for all query types", lastTerms, terms); - } - lastTerms = terms; } } @@ -211,7 +199,6 @@ long upper=(count-1)*distance + (distance/3) + startOffset; NumericRangeQuery q=NumericRangeQuery.newLongRange(field, precisionStep, null, upper, true, true); TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); - if (VERBOSE) System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", count, sd.length ); @@ -257,7 +244,6 @@ long lower=(count-1)*distance + (distance/3) +startOffset; NumericRangeQuery q=NumericRangeQuery.newLongRange(field, precisionStep, lower, null, true, true); TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); - if (VERBOSE) System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", noDocs-count, sd.length ); @@ -349,8 +335,8 @@ } private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { - String field="field"+precisionStep; - int termCountT=0,termCountC=0; + final String field=StringHelper.intern("field"+precisionStep); + int totalTermCountT=0,totalTermCountC=0,termCountT,termCountC; int num = _TestUtil.nextInt(random, 10, 20); for (int i = 0; i < num; i++) { long lower=(long)(random.nextDouble()*noDocs*distance)+startOffset; @@ -364,44 +350,77 @@ TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCountT += tq.getTotalNumberOfTerms(); - termCountC += cq.getTotalNumberOfTerms(); + totalTermCountT += termCountT = countTerms(tq, field); + totalTermCountC += termCountC = countTerms(cq, field); + checkTermCounts(precisionStep, termCountT, termCountC); // test exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false); cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCountT += tq.getTotalNumberOfTerms(); - termCountC += cq.getTotalNumberOfTerms(); + totalTermCountT += termCountT = countTerms(tq, field); + totalTermCountC += termCountC = countTerms(cq, field); + checkTermCounts(precisionStep, termCountT, termCountC); // test left exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true); cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, true); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCountT += tq.getTotalNumberOfTerms(); - termCountC += cq.getTotalNumberOfTerms(); + totalTermCountT += termCountT = countTerms(tq, field); + totalTermCountC += termCountC = countTerms(cq, field); + checkTermCounts(precisionStep, termCountT, termCountC); // test right exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false); cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); - termCountT += tq.getTotalNumberOfTerms(); - termCountC += cq.getTotalNumberOfTerms(); + totalTermCountT += termCountT = countTerms(tq, field); + totalTermCountC += termCountC = countTerms(cq, field); + checkTermCounts(precisionStep, termCountT, termCountC); } - if (precisionStep == Integer.MAX_VALUE && - (searcher.getIndexReader().getSequentialSubReaders() == null || - searcher.getIndexReader().getSequentialSubReaders().length == 1)) { - assertEquals("Total number of terms should be equal for unlimited precStep", termCountT, termCountC); - } else if (VERBOSE) { + + checkTermCounts(precisionStep, totalTermCountT, totalTermCountC); + if (VERBOSE && precisionStep != Integer.MAX_VALUE) { System.out.println("Average number of terms during random search on '" + field + "':"); - System.out.println(" Trie query: " + (((double)termCountT)/(num * 4))); - System.out.println(" Classical query: " + (((double)termCountC)/(num * 4))); + System.out.println(" Numeric query: " + (((double)totalTermCountT)/(num * 4))); + System.out.println(" Classical query: " + (((double)totalTermCountC)/(num * 4))); } } + private int countTerms(MultiTermQuery q, String field) throws Exception { + FilteredTermEnum termEnum = q.getEnum(reader); + try { + int count = 0; + Term last = null; + do { + final Term cur = termEnum.term(); + if (cur != null) { + count++; + assertSame(field, cur.field()); + if (last != null) { + assertTrue(last.text().compareTo(cur.text()) < 0); + } + last = cur; + } else break; + } while (termEnum.next()); + assertFalse(termEnum.next()); + return count; + } finally { + termEnum.close(); + } + } + + private void checkTermCounts(int precisionStep, int termCountT, int termCountC) { + if (precisionStep == Integer.MAX_VALUE) { + assertEquals("Number of terms should be equal for unlimited precStep", termCountC, termCountT); + } else { + assertTrue("Number of terms for NRQ should be <= compared to classical TRQ", termCountT <= termCountC); + } + } + @Test public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { testRandomTrieAndClassicRangeQuery(8);