Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1487569) +++ lucene/CHANGES.txt (working copy) @@ -118,6 +118,11 @@ for scoringQueries. Instead use QueryValueSource to safely wrap arbitrary queries and use them with CustomScoreQuery. (John Wang, Robert Muir) +* LUCENE-5016: SamplingAccumulator returned inconsistent label if asked to + aggregate a non-existing category. Also fixed a bug in RangeAccumulator if + some readers did not have the requested numeric DV field. + (Rob Audenaerde, Shai Erera) + Optimizations * LUCENE-4936: Improve numeric doc values compression in case all values share Index: lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java (revision 1487569) +++ lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java (working copy) @@ -64,7 +64,7 @@ throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed"); } - RangeFacetRequest rfr = (RangeFacetRequest) fr; + RangeFacetRequest rfr = (RangeFacetRequest) fr; requests.add(new RangeSet(rfr.ranges, rfr.categoryPath.components[0])); } @@ -86,8 +86,11 @@ RangeSet ranges = requests.get(i); int[] counts = new int[ranges.ranges.length]; - for(MatchingDocs hits : matchingDocs) { + for (MatchingDocs hits : matchingDocs) { NumericDocValues ndv = hits.context.reader().getNumericDocValues(ranges.field); + if (ndv == null) { + continue; // no numeric values for this field in this reader + } final int length = hits.bits.length(); int doc = 0; while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) { Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java (revision 1487569) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java (working copy) @@ -209,7 +209,7 @@ super(orig.categoryPath, num); this.orig = orig; setDepth(orig.getDepth()); - setNumLabel(orig.getNumLabel()); + setNumLabel(0); // don't label anything as we're over-sampling setResultMode(orig.getResultMode()); setSortOrder(orig.getSortOrder()); } Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java (revision 1487569) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java (working copy) @@ -87,7 +87,7 @@ List sampleRes = super.accumulate(docids); - List fixedRes = new ArrayList(); + List results = new ArrayList(); for (FacetResult fres : sampleRes) { // for sure fres is not null because this is guaranteed by the delegee. PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest()); @@ -104,13 +104,18 @@ } // final labeling if allowed (because labeling is a costly operation) - frh.labelResult(fres); - fixedRes.add(fres); // add to final results + if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) { + // category does not exist, add an empty result + results.add(emptyResult(fres.getFacetResultNode().ordinal, fres.getFacetRequest())); + } else { + frh.labelResult(fres); + results.add(fres); + } } searchParams = original; // Back to original params - return fixedRes; + return results; } @Override Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java (revision 1487569) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java (working copy) @@ -10,6 +10,7 @@ import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.ScoredDocIDs; import org.apache.lucene.facet.search.StandardFacetsAccumulator; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -62,7 +63,7 @@ List sampleRes = delegee.accumulate(sampleSet.docids); - List fixedRes = new ArrayList(); + List results = new ArrayList(); SampleFixer sampleFixer = sampler.samplingParams.getSampleFixer(); for (FacetResult fres : sampleRes) { @@ -80,15 +81,20 @@ } // final labeling if allowed (because labeling is a costly operation) - frh.labelResult(fres); - fixedRes.add(fres); // add to final results + if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) { + // category does not exist, add an empty result + results.add(emptyResult(fres.getFacetResultNode().ordinal, fres.getFacetRequest())); + } else { + frh.labelResult(fres); + results.add(fres); + } } if (shouldOversample) { delegee.searchParams = original; // Back to original params } - return fixedRes; + return results; } @Override Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java (revision 1487569) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java (working copy) @@ -17,8 +17,20 @@ import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.params.PerDimensionIndexingParams; +import org.apache.lucene.facet.range.LongRange; +import org.apache.lucene.facet.range.RangeAccumulator; +import org.apache.lucene.facet.range.RangeFacetRequest; +import org.apache.lucene.facet.sampling.RandomSampler; +import org.apache.lucene.facet.sampling.Sampler; +import org.apache.lucene.facet.sampling.SamplingAccumulator; +import org.apache.lucene.facet.sampling.SamplingParams; +import org.apache.lucene.facet.sampling.SamplingWrapper; +import org.apache.lucene.facet.sampling.TakmiSampleFixer; import org.apache.lucene.facet.search.FacetRequest.ResultMode; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesAccumulator; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; @@ -384,5 +396,72 @@ IOUtils.close(taxo, taxoDir, r, indexDir); } - + + @Test + public void testLabeling() throws Exception { + Directory indexDir = newDirectory(), taxoDir = newDirectory(); + + // create the index + IndexWriter indexWriter = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); + FacetFields facetFields = new FacetFields(taxoWriter); + Document doc = new Document(); + facetFields.addFields(doc, Arrays.asList(new CategoryPath("A/1", '/'))); + indexWriter.addDocument(doc); + IOUtils.close(indexWriter, taxoWriter); + + DirectoryReader indexReader = DirectoryReader.open(indexDir); + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); + IndexSearcher searcher = new IndexSearcher(indexReader); + // ask to count a non-existing category to test labeling + FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("B"), 5)); + + final SamplingParams sampleParams = new SamplingParams(); + sampleParams.setMaxSampleSize(100); + sampleParams.setMinSampleSize(100); + sampleParams.setSamplingThreshold(100); + sampleParams.setOversampleFactor(1.0d); + if (random().nextBoolean()) { + sampleParams.setSampleFixer(new TakmiSampleFixer(indexReader, taxoReader, fsp)); + } + final Sampler sampler = new RandomSampler(sampleParams, random()); + + FacetsAccumulator[] accumulators = new FacetsAccumulator[] { + new FacetsAccumulator(fsp, indexReader, taxoReader), + new StandardFacetsAccumulator(fsp, indexReader, taxoReader), + new SamplingAccumulator(sampler, fsp, indexReader, taxoReader), + new AdaptiveFacetsAccumulator(fsp, indexReader, taxoReader), + new SamplingWrapper(new StandardFacetsAccumulator(fsp, indexReader, taxoReader), sampler) + }; + + for (FacetsAccumulator fa : accumulators) { + FacetsCollector fc = FacetsCollector.create(fa); + searcher.search(new MatchAllDocsQuery(), fc); + List facetResults = fc.getFacetResults(); + assertNotNull(facetResults); + assertEquals("incorrect label returned for " + fa, fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label); + } + + try { + // SortedSetDocValuesAccumulator cannot even be created in such state + assertNull(new SortedSetDocValuesAccumulator(fsp, new SortedSetDocValuesReaderState(indexReader))); + // if this ever changes, make sure FacetResultNode is labeled correctly + fail("should not have succeeded to execute a request over a category which wasn't indexed as SortedSetDVField"); + } catch (IllegalArgumentException e) { + // expected + } + + fsp = new FacetSearchParams(new RangeFacetRequest("f", new LongRange("grr", 0, true, 1, true))); + RangeAccumulator ra = new RangeAccumulator(fsp, indexReader); + FacetsCollector fc = FacetsCollector.create(ra); + searcher.search(new MatchAllDocsQuery(), fc); + List facetResults = fc.getFacetResults(); + assertNotNull(facetResults); + assertEquals("incorrect label returned for RangeAccumulator", fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label); + + IOUtils.close(indexReader, taxoReader); + + IOUtils.close(indexDir, taxoDir); + } + }