Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1437197) +++ lucene/CHANGES.txt (working copy) @@ -64,6 +64,10 @@ * LUCENE-4599: New oal.codecs.compressing.CompressingTermVectorsFormat which compresses term vectors into chunks of documents similarly to CompressingStoredFieldsFormat. (Adrien Grand) + +API Changes + +* LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera) Bug Fixes Index: lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java (revision 1437197) +++ lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java (working copy) @@ -5,7 +5,6 @@ import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.search.sampling.RandomSampler; import org.apache.lucene.facet.search.sampling.Sampler; import org.apache.lucene.facet.search.sampling.SamplingAccumulator; @@ -37,8 +36,7 @@ *

* Note: Sampling accumulation (Accumulation over a sampled-set of the results), * does not guarantee accurate values for - * {@link FacetResult#getNumValidDescendants()} and - * {@link FacetResultNode#residue}. + * {@link FacetResult#getNumValidDescendants()}. * * @lucene.experimental */ Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (revision 1437197) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (working copy) @@ -262,7 +262,6 @@ } child = siblings[child]; } - root.residue = 0; root.subResults = nodes; res.add(new FacetResult(fr, root, nodes.size())); continue; @@ -273,17 +272,13 @@ FacetResultNode top = pq.top(); int child = children[rootOrd]; int numResults = 0; // count the number of results - int residue = 0; while (child != TaxonomyReader.INVALID_ORDINAL) { int count = counts[child]; if (count > top.value) { - residue += top.value; top.value = count; top.ordinal = child; top = pq.updateTop(); ++numResults; - } else { - residue += count; } child = siblings[child]; } @@ -300,7 +295,6 @@ node.label = taxoReader.getPath(node.ordinal); subResults[i] = node; } - root.residue = residue; root.subResults = Arrays.asList(subResults); res.add(new FacetResult(fr, root, size)); } Index: lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java (revision 1437197) +++ lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java (working copy) @@ -6,7 +6,6 @@ import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.search.sampling.Sampler; import org.apache.lucene.facet.search.sampling.Sampler.SampleResult; @@ -32,8 +31,7 @@ *

* Note: Sampling accumulation (Accumulation over a sampled-set of the results), * does not guarantee accurate values for - * {@link FacetResult#getNumValidDescendants()} and - * {@link FacetResultNode#residue}. + * {@link FacetResult#getNumValidDescendants()}. * * @lucene.experimental */ Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (revision 1437197) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (working copy) @@ -62,7 +62,6 @@ value = facetRequest.getValueOf(facetArrays, ordinal % partitionSize); } - // TODO (Facet): should initial value of "residue" depend on aggregator if not sum? FacetResultNode parentResultNode = new FacetResultNode(ordinal, value); Heap heap = ResultSortUtils.createSuitableHeap(facetRequest); @@ -97,11 +96,7 @@ } // bring sub results from heap of tmp res into result heap for (int i = tmpHeap.size(); i > 0; i--) { - - FacetResultNode a = heap.insertWithOverflow(tmpHeap.pop()); - if (a != null) { - resNode.residue += a.residue; - } + heap.insertWithOverflow(tmpHeap.pop()); } } @@ -177,14 +172,9 @@ reusable.value = value; reusable.subResults.clear(); reusable.label = null; - reusable.residue = 0; } ++childrenCounter; reusable = pq.insertWithOverflow(reusable); - if (reusable != null) { - // TODO (Facet): is other logic (not add) needed, per aggregator? - parentResultNode.residue += reusable.value; - } } } if (localDepth < depth) { Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (revision 1437197) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (working copy) @@ -266,9 +266,7 @@ tosOrdinal = siblings[tosOrdinal]; } // now it is inside. Run it and all its siblings inside the partition through a heap - // and in doing so, count them, find best K, and sum into residue - double residue = 0f; // the sum of all the siblings from this partition that do not make - // it to top K + // and in doing so, count them, find best K pq.clear(); //reusables are consumed as from a stack. The stack starts full and returns full. @@ -286,10 +284,6 @@ ac.value = value; ac = pq.insertWithOverflow(ac); if (null != ac) { - residue += ac.value; - // TODO (Facet): could it be that we need to do something - // else, not add, depending on the aggregator? - /* when a facet is excluded from top K, because already in this partition it has * K better siblings, it is only recursed for count only. */ @@ -320,7 +314,7 @@ // and add ords to sibling stack, and make a note in siblingExplored that these are to // be visited now if (ords.length > 0) { - AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals,residue)); + AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals)); bestSignlingsStack[localDepth] = ords; siblingExplored[localDepth] = ords.length-1; ordinalStack[localDepth] = ords[ords.length-1]; @@ -449,8 +443,7 @@ IntToObjectMap tmpToReturnMapToACCOs = tmpToReturn.mapToAACOs; IntToObjectMap tfrMapToACCOs = tfr.mapToAACOs; IntIterator tfrIntIterator = tfrMapToACCOs.keyIterator(); - //iterate over all ordinals in tfr that are maps to their children (and the residue over - // non included chilren) + //iterate over all ordinals in tfr that are maps to their children while (tfrIntIterator.hasNext()) { int tfrkey = tfrIntIterator.next(); AACO tmpToReturnAACO = null; @@ -467,7 +460,6 @@ } int[] resOrds = new int [resLength]; double[] resVals = new double [resLength]; - double resResidue = tmpToReturnAACO.residue + tfrAACO.residue; int indexIntoTmpToReturn = 0; int indexIntoTFR = 0; ACComparator merger = getSuitableACComparator(); // by facet Request @@ -504,15 +496,9 @@ // altogether yielding no more that best K kids for tfrkey, not to appear in the new shape of // tmpToReturn - while (indexIntoTmpToReturn < tmpToReturnAACO.ordinals.length) { - resResidue += tmpToReturnAACO.values[indexIntoTmpToReturn++]; - } - while (indexIntoTFR < tfrAACO.ordinals.length) { - resResidue += tfrAACO.values[indexIntoTFR++]; - } //update the list of best kids of tfrkey as appear in tmpToReturn - tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals, resResidue)); - } // endof need to merge both AACO -- children and residue for same ordinal + tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals)); + } // endof need to merge both AACO -- children for same ordinal } // endof loop over all ordinals in tfr } // endof loop over all temporary facet results to merge @@ -682,19 +668,15 @@ * potential nodes of the {@link FacetResult} tree * (i.e., the descendants of the root node, no deeper than the specified depth). * No more than K ( = {@link FacetRequest#getNumResults()}) - * siblings are enumerated, and - * residue holds the sum of values of the siblings rejected from the - * enumerated top K. + * siblings are enumerated. * @lucene.internal */ protected static final class AACO { int [] ordinals; // ordinals of the best K children, sorted from best to least double [] values; // the respective values for these children - double residue; // sum of values of all other children, that did not get into top K - AACO (int[] ords, double[] vals, double r) { + AACO (int[] ords, double[] vals) { this.ordinals = ords; this.values = vals; - this.residue = r; } } @@ -787,7 +769,6 @@ list.add(generateNode(aaco.ordinals[i], aaco.values[i], mapToAACOs)); } node.subResults = list; - node.residue = aaco.residue; return node; } Index: lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java (revision 1437197) +++ lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java (working copy) @@ -59,14 +59,6 @@ public double value; /** - * The total value of screened out sub results. If only part of the results - * were returned (usually because only the top-K categories are requested), - * then this provides information on "what else is there under this result - * node". - */ - public double residue; - - /** * The sub-results of this result. If {@link FacetRequest#getResultMode()} is * {@link ResultMode#PER_NODE_IN_TREE}, every sub result denotes an immediate * child of this node. Otherwise, it is a descendant of any level. @@ -100,9 +92,6 @@ sb.append(label.toString()); } sb.append(" (").append(Double.toString(value)).append(")"); - if (residue > 0) { - sb.append(" (residue=").append(residue).append(")"); - } for (FacetResultNode sub : subResults) { sb.append("\n").append(prefix).append(sub.toString(prefix + " ")); } Index: lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java (revision 1437197) +++ lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java (working copy) @@ -39,8 +39,7 @@ *

* Note: Sampling accumulation (Accumulation over a sampled-set of the results), * does not guarantee accurate values for - * {@link FacetResult#getNumValidDescendants()} & - * {@link FacetResultNode#residue}. + * {@link FacetResult#getNumValidDescendants()}. * * @lucene.experimental */ @@ -187,17 +186,6 @@ trimmed.add(trimmedNode); } - /* - * If we are trimming, it means Sampling is in effect and the extra - * (over-sampled) results are being trimmed. Although the residue is not - * guaranteed to be accurate for Sampling, we try our best to fix it. - * The node's residue now will take under account the sub-nodes we're - * trimming. - */ - for (int i = size; i < node.subResults.size(); i++) { - node.residue += node.subResults.get(i).value; - } - node.subResults = trimmed; } Index: lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java (revision 1437197) +++ lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java (working copy) @@ -12,7 +12,6 @@ import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.search.sampling.Sampler.SampleResult; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; @@ -48,8 +47,7 @@ *

* Note: Sampling accumulation (Accumulation over a sampled-set of the results), * does not guarantee accurate values for - * {@link FacetResult#getNumValidDescendants()} & - * {@link FacetResultNode#residue}. + * {@link FacetResult#getNumValidDescendants()}. * * @see Sampler * @lucene.experimental Index: lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java (revision 1437197) +++ lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java (working copy) @@ -328,7 +328,6 @@ for (FacetResult res : facetResults) { FacetResultNode root = res.getFacetResultNode(); assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value); - assertEquals("invalid residue", 0, (int) root.residue); for (FacetResultNode child : root.subResults) { assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value); } @@ -338,38 +337,6 @@ } @Test - public void testResidue() throws Exception { - // test the collector's handling of residue - DirectoryReader indexReader = DirectoryReader.open(indexDir); - TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); - IndexSearcher searcher = new IndexSearcher(indexReader); - - // asking for top 1 is the only way to guarantee there will be a residue - // provided that enough children were indexed (see below) - FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, 1), new CountFacetRequest(CP_B, 1)); - FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader); - TermQuery q = new TermQuery(A); - searcher.search(q, fc); - - List facetResults = fc.getFacetResults(); - assertEquals("invalid number of facet results", 2, facetResults.size()); - for (FacetResult res : facetResults) { - FacetResultNode root = res.getFacetResultNode(); - assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value); - // make sure randomness didn't pick only one child of root (otherwise there's no residue) - int numChildrenIndexed = res.getFacetRequest().categoryPath == CP_A ? numChildrenIndexedA : numChildrenIndexedB; - if (numChildrenIndexed > 1) { - assertTrue("expected residue", root.residue > 0); - } - for (FacetResultNode child : root.subResults) { - assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value); - } - } - - IOUtils.close(indexReader, taxoReader); - } - - @Test public void testAllCounts() throws Exception { DirectoryReader indexReader = DirectoryReader.open(indexDir); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); @@ -385,7 +352,6 @@ for (FacetResult res : facetResults) { FacetResultNode root = res.getFacetResultNode(); assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); - assertEquals("invalid residue", 0, (int) root.residue); for (FacetResultNode child : root.subResults) { assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); } @@ -410,7 +376,6 @@ for (FacetResult res : facetResults) { FacetResultNode root = res.getFacetResultNode(); assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); - assertEquals("invalid residue", 0, (int) root.residue); for (FacetResultNode child : root.subResults) { assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); } @@ -435,7 +400,6 @@ for (FacetResult res : facetResults) { FacetResultNode root = res.getFacetResultNode(); assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); - assertEquals("invalid residue", 0, (int) root.residue); for (FacetResultNode child : root.subResults) { assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); } @@ -501,7 +465,6 @@ for (FacetResult res : facetResults) { FacetResultNode root = res.getFacetResultNode(); assertEquals("wrong count for " + root.label, expCounts.get(root.label), (int) root.value); - assertEquals("invalid residue", 0, (int) root.residue); for (FacetResultNode child : root.subResults) { assertEquals("wrong count for " + child.label, expCounts.get(child.label), (int) child.value); } Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (revision 1437197) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (working copy) @@ -182,39 +182,34 @@ assertEquals(9, fr.getNumValidDescendants()); FacetResultNode parentRes = fr.getFacetResultNode(); assertEquals(16.0, parentRes.value, Double.MIN_VALUE); - assertEquals(2.0, parentRes.residue, Double.MIN_VALUE); assertEquals(2, parentRes.subResults.size()); // two nodes sorted by descending values: a/b with 8 and a/c with 6 - // a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2. - // a/c has residue 0, and one child a/c/1 with value 1. - double [] expectedValues0 = { 8.0, 2.0, 3.0, 0.0, 2.0, 0.0, 6.0, 0.0, 1.0, 0.0 }; + // a/b has two children a/b/2 with value 3, and a/b/1 with value 2. + // a/c has one child a/c/1 with value 1. + double [] expectedValues0 = { 8.0, 3.0, 2.0, 6.0, 1.0 }; int i = 0; for (FacetResultNode node : parentRes.subResults) { assertEquals(expectedValues0[i++], node.value, Double.MIN_VALUE); - assertEquals(expectedValues0[i++], node.residue, Double.MIN_VALUE); for (FacetResultNode node2 : node.subResults) { assertEquals(expectedValues0[i++], node2.value, Double.MIN_VALUE); - assertEquals(expectedValues0[i++], node2.residue, Double.MIN_VALUE); } } // now just change the value of the first child of the root to 5, and then rearrange - // expected are: first a/c of value 6 and residue 0, and one child a/c/1 with value 1 - // then a/b with value 5 and residue 2, and both children: a/b/2 with value 3, and a/b/1 with value 2. + // expected are: first a/c of value 6, and one child a/c/1 with value 1 + // then a/b with value 5, and both children: a/b/2 with value 3, and a/b/1 with value 2. for (FacetResultNode node : parentRes.subResults) { node.value = 5.0; break; } // now rearrange - double [] expectedValues00 = { 6.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 0.0, 2.0, 0.0 }; + double [] expectedValues00 = { 6.0, 1.0, 5.0, 3.0, 2.0 }; fr = cfra23.createFacetResultsHandler(tr).rearrangeFacetResult(fr); i = 0; for (FacetResultNode node : parentRes.subResults) { assertEquals(expectedValues00[i++], node.value, Double.MIN_VALUE); - assertEquals(expectedValues00[i++], node.residue, Double.MIN_VALUE); for (FacetResultNode node2 : node.subResults) { assertEquals(expectedValues00[i++], node2.value, Double.MIN_VALUE); - assertEquals(expectedValues00[i++], node2.residue, Double.MIN_VALUE); } } @@ -223,18 +218,15 @@ assertEquals(9, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(16.0, parentRes.value, Double.MIN_VALUE); - assertEquals(2.0, parentRes.residue, Double.MIN_VALUE); assertEquals(2, parentRes.subResults.size()); // two nodes sorted by descending values: a/b with 8 and a/c with 6 - // a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2. - // a/c has residue 0, and one child a/c/1 with value 1. + // a/b has two children a/b/2 with value 3, and a/b/1 with value 2. + // a/c has one child a/c/1 with value 1. i = 0; for (FacetResultNode node : parentRes.subResults) { assertEquals(expectedValues0[i++], node.value, Double.MIN_VALUE); - assertEquals(expectedValues0[i++], node.residue, Double.MIN_VALUE); for (FacetResultNode node2 : node.subResults) { assertEquals(expectedValues0[i++], node2.value, Double.MIN_VALUE); - assertEquals(expectedValues0[i++], node2.residue, Double.MIN_VALUE); } } @@ -243,16 +235,13 @@ assertEquals(4, fr.getNumValidDescendants(), 4); parentRes = fr.getFacetResultNode(); assertEquals(16.0, parentRes.value, Double.MIN_VALUE); - assertEquals(2.0, parentRes.residue, Double.MIN_VALUE); assertEquals(2, parentRes.subResults.size()); // two nodes sorted by descending values: - // a/b with value 8 and residue 0 (because no children considered), - // and a/c with value 6 and residue 0 (because no children considered) - double [] expectedValues2 = { 8.0, 0.0, 6.0, 0.0 }; + // a/b with value 8 and a/c with value 6 + double [] expectedValues2 = { 8.0, 6.0, 0.0}; i = 0; for (FacetResultNode node : parentRes.subResults) { assertEquals(expectedValues2[i++], node.value, Double.MIN_VALUE); - assertEquals(expectedValues2[i++], node.residue, Double.MIN_VALUE); assertEquals(node.subResults.size(), 0); } @@ -261,13 +250,11 @@ assertEquals(4, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.value, Double.MIN_VALUE); - assertEquals(2.0, parentRes.residue, Double.MIN_VALUE); assertEquals(2, parentRes.subResults.size()); double [] expectedValues3 = { 3.0, 2.0 }; i = 0; for (FacetResultNode node : parentRes.subResults) { assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE); - assertEquals(0.0, node.residue, Double.MIN_VALUE); assertEquals(0, node.subResults.size()); } @@ -276,12 +263,10 @@ assertEquals(4, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.value, Double.MIN_VALUE); - assertEquals(2.0, parentRes.residue, Double.MIN_VALUE); assertEquals(2, parentRes.subResults.size()); i = 0; for (FacetResultNode node : parentRes.subResults) { assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE); - assertEquals(0.0, node.residue, Double.MIN_VALUE); assertEquals(0, node.subResults.size()); } @@ -290,12 +275,10 @@ assertEquals(4, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.value, Double.MIN_VALUE); - assertEquals(2.0, parentRes.residue, Double.MIN_VALUE); assertEquals(2, parentRes.subResults.size()); i = 0; for (FacetResultNode node : parentRes.subResults) { assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE); - assertEquals(0.0, node.residue, Double.MIN_VALUE); assertEquals(0, node.subResults.size()); } @@ -304,7 +287,6 @@ assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.value, Double.MIN_VALUE); - assertEquals(0.0, parentRes.residue, Double.MIN_VALUE); assertEquals(0, parentRes.subResults.size()); hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);