Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1364076) +++ lucene/CHANGES.txt (working copy) @@ -93,6 +93,10 @@ all queries. Made Scorer.freq() abstract. (Koji Sekiguchi, Mike McCandless, Robert Muir) +* LUCENE-4242: When creating uninverted cache, normalize the TermEnum.docFreq() + so that terms are not excluded when there is a high ratio of deleted docs + in the index. (Roman Chyla via Mike McCandless) + Build * LUCENE-4094: Support overriding file.encoding on forked test JVMs Index: lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (revision 1364076) +++ lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (working copy) @@ -296,6 +296,15 @@ int termNum = 0; docsEnum = null; + + float liveDocsRatio; + if (maxDoc == 0) { + // Paranoia: + liveDocsRatio = 1.0f; + } else { + liveDocsRatio = ((float) reader.numDocs()) / maxDoc; + } + // Loop begins with te positioned to first term (we call // seek above): for (;;) { @@ -330,10 +339,10 @@ // and 2) use FST not array/PagedBytes indexedTerms.add(indexedTerm); } + + // Pro-rate the term's docFreq by percentage of live docs: + if (te.docFreq() * liveDocsRatio <= maxTermDocFreq) { - final int df = te.docFreq(); - if (df <= maxTermDocFreq) { - docsEnum = te.docs(liveDocs, docsEnum, false); // dF, but takes deletions into account Index: solr/core/src/test/org/apache/solr/TestRandomFaceting.java =================================================================== --- solr/core/src/test/org/apache/solr/TestRandomFaceting.java (revision 1364076) +++ solr/core/src/test/org/apache/solr/TestRandomFaceting.java (working copy) @@ -207,6 +207,7 @@ for (String method : methods) { // params.add("facet.field", "{!key="+method+"}" + ftype.fname); // TODO: allow method to be passed on local params? + System.out.println("METHOD=" + method); params.set("facet.method", method);