Index: lucene/facet/src/java/org/apache/lucene/facet/complements/ComplementCountingAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/complements/ComplementCountingAggregator.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/complements/ComplementCountingAggregator.java (working copy) @@ -1,45 +0,0 @@ -package org.apache.lucene.facet.complements; - -import java.io.IOException; - -import org.apache.lucene.facet.search.CountingAggregator; -import org.apache.lucene.util.IntsRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A {@link CountingAggregator} used during complement counting. - * - * @lucene.experimental - */ -public class ComplementCountingAggregator extends CountingAggregator { - - public ComplementCountingAggregator(int[] counterArray) { - super(counterArray); - } - - @Override - public void aggregate(int docID, float score, IntsRef ordinals) throws IOException { - for (int i = 0; i < ordinals.length; i++) { - int ord = ordinals.ints[i]; - assert counterArray[ord] != 0 : "complement aggregation: count is about to become negative for ordinal " + ord; - --counterArray[ord]; - } - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/complements/TotalFacetCounts.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/complements/TotalFacetCounts.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/complements/TotalFacetCounts.java (working copy) @@ -11,20 +11,20 @@ import java.util.HashMap; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.facet.old.Aggregator; +import org.apache.lucene.facet.old.CountingAggregator; +import org.apache.lucene.facet.old.OldFacetsAccumulator; +import org.apache.lucene.facet.old.ScoredDocIdsUtils; import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.search.Aggregator; import org.apache.lucene.facet.search.CategoryListIterator; import org.apache.lucene.facet.search.CountFacetRequest; -import org.apache.lucene.facet.search.CountingAggregator; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetRequest; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.util.PartitionsUtils; -import org.apache.lucene.facet.util.ScoredDocIdsUtils; import org.apache.lucene.index.IndexReader; /* @@ -159,7 +159,7 @@ final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize]; FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams, DUMMY_REQ); //createAllListsSearchParams(facetIndexingParams, this.totalCounts); - StandardFacetsAccumulator sfa = new StandardFacetsAccumulator(newSearchParams, indexReader, taxonomy) { + OldFacetsAccumulator sfa = new OldFacetsAccumulator(newSearchParams, indexReader, taxonomy) { @Override protected HashMap getCategoryListMap( FacetArrays facetArrays, int partition) throws IOException { @@ -172,7 +172,7 @@ return map; } }; - sfa.setComplementThreshold(StandardFacetsAccumulator.DISABLE_COMPLEMENT); + sfa.setComplementThreshold(OldFacetsAccumulator.DISABLE_COMPLEMENT); sfa.accumulate(ScoredDocIdsUtils.createAllDocsScoredDocIDs(indexReader)); return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed); } Index: lucene/facet/src/java/org/apache/lucene/facet/old/AdaptiveFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/AdaptiveFacetsAccumulator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/AdaptiveFacetsAccumulator.java (working copy) @@ -0,0 +1,116 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.sampling.RandomSampler; +import org.apache.lucene.facet.sampling.Sampler; +import org.apache.lucene.facet.sampling.SamplingAccumulator; +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.FacetResult; +import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.IndexReader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * {@link FacetsAccumulator} whose behavior regarding complements, sampling, + * etc. is not set up front but rather is determined at accumulation time + * according to the statistics of the accumulated set of documents and the + * index. + *

+ * Note: Sampling accumulation (Accumulation over a sampled-set of the results), + * does not guarantee accurate values for + * {@link FacetResult#getNumValidDescendants()}. + * + * @lucene.experimental + */ +public final class AdaptiveFacetsAccumulator extends OldFacetsAccumulator { + + private Sampler sampler = new RandomSampler(); + + /** + * Create an {@link AdaptiveFacetsAccumulator} + * @see OldFacetsAccumulator#OldFacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader) + */ + public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, + TaxonomyReader taxonomyReader) { + super(searchParams, indexReader, taxonomyReader); + } + + /** + * Create an {@link AdaptiveFacetsAccumulator} + * + * @see OldFacetsAccumulator#OldFacetsAccumulator(FacetSearchParams, + * IndexReader, TaxonomyReader, FacetArrays) + */ + public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, + TaxonomyReader taxonomyReader, FacetArrays facetArrays) { + super(searchParams, indexReader, taxonomyReader, facetArrays); + } + + /** + * Set the sampler. + * @param sampler sampler to set + */ + public void setSampler(Sampler sampler) { + this.sampler = sampler; + } + + @Override + public List accumulate(ScoredDocIDs docids) throws IOException { + OldFacetsAccumulator delegee = appropriateFacetCountingAccumulator(docids); + + if (delegee == this) { + return super.accumulate(docids); + } + + return delegee.accumulate(docids); + } + + /** + * Compute the appropriate facet accumulator to use. + * If no special/clever adaptation is possible/needed return this (self). + */ + private OldFacetsAccumulator appropriateFacetCountingAccumulator(ScoredDocIDs docids) { + // Verify that searchPareams permit sampling/complement/etc... otherwise do default + if (!mayComplement()) { + return this; + } + + // Now we're sure we can use the sampling methods as we're in a counting only mode + + // Verify that sampling is enabled and required ... otherwise do default + if (sampler == null || !sampler.shouldSample(docids)) { + return this; + } + + SamplingAccumulator samplingAccumulator = new SamplingAccumulator(sampler, searchParams, indexReader, taxonomyReader); + samplingAccumulator.setComplementThreshold(getComplementThreshold()); + return samplingAccumulator; + } + + /** + * @return the sampler in effect + */ + public final Sampler getSampler() { + return sampler; + } +} \ No newline at end of file Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/AdaptiveFacetsAccumulator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/old/Aggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/Aggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/Aggregator.java (working copy) @@ -0,0 +1,48 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.IntsRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Aggregates the categories of documents given to + * {@link #aggregate(int, float, IntsRef)}. Note that the document IDs are local + * to the reader given to {@link #setNextReader(AtomicReaderContext)}. + * + * @lucene.experimental + */ +public interface Aggregator { + + /** + * Sets the {@link AtomicReaderContext} for which + * {@link #aggregate(int, float, IntsRef)} calls will be made. If this method + * returns false, {@link #aggregate(int, float, IntsRef)} should not be called + * for this reader. + */ + public boolean setNextReader(AtomicReaderContext context) throws IOException; + + /** + * Aggregate the ordinals of the given document ID (and its score). The given + * ordinals offset is always zero. + */ + public void aggregate(int docID, float score, IntsRef ordinals) throws IOException; + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/Aggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/old/ComplementCountingAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/ComplementCountingAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/ComplementCountingAggregator.java (working copy) @@ -0,0 +1,44 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; + +import org.apache.lucene.util.IntsRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link CountingAggregator} used during complement counting. + * + * @lucene.experimental + */ +public class ComplementCountingAggregator extends CountingAggregator { + + public ComplementCountingAggregator(int[] counterArray) { + super(counterArray); + } + + @Override + public void aggregate(int docID, float score, IntsRef ordinals) throws IOException { + for (int i = 0; i < ordinals.length; i++) { + int ord = ordinals.ints[i]; + assert counterArray[ord] != 0 : "complement aggregation: count is about to become negative for ordinal " + ord; + --counterArray[ord]; + } + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/ComplementCountingAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/old/CountingAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/CountingAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/CountingAggregator.java (working copy) @@ -0,0 +1,66 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.IntsRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An {@link Aggregator} which updates a counter array with the size of the + * whole taxonomy, counting the number of times each category appears in the + * given set of documents. + * + * @lucene.experimental + */ +public class CountingAggregator implements Aggregator { + + protected int[] counterArray; + + public CountingAggregator(int[] counterArray) { + this.counterArray = counterArray; + } + + @Override + public void aggregate(int docID, float score, IntsRef ordinals) throws IOException { + for (int i = 0; i < ordinals.length; i++) { + counterArray[ordinals.ints[i]]++; + } + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != this.getClass()) { + return false; + } + CountingAggregator that = (CountingAggregator) obj; + return that.counterArray == this.counterArray; + } + + @Override + public int hashCode() { + return counterArray == null ? 0 : counterArray.hashCode(); + } + + @Override + public boolean setNextReader(AtomicReaderContext context) throws IOException { + return true; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/CountingAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/old/MatchingDocsAsScoredDocIDs.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/MatchingDocsAsScoredDocIDs.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/MatchingDocsAsScoredDocIDs.java (working copy) @@ -0,0 +1,174 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Represents {@link MatchingDocs} as {@link ScoredDocIDs}. + * + * @lucene.experimental + */ +public class MatchingDocsAsScoredDocIDs implements ScoredDocIDs { + + // TODO remove this class once we get rid of ScoredDocIDs + + final List matchingDocs; + final int size; + + public MatchingDocsAsScoredDocIDs(List matchingDocs) { + this.matchingDocs = matchingDocs; + int totalSize = 0; + for (MatchingDocs md : matchingDocs) { + totalSize += md.totalHits; + } + this.size = totalSize; + } + + @Override + public ScoredDocIDsIterator iterator() throws IOException { + return new ScoredDocIDsIterator() { + + final Iterator mdIter = matchingDocs.iterator(); + + int scoresIdx = 0; + int doc = 0; + MatchingDocs current; + int currentLength; + boolean done = false; + + @Override + public boolean next() { + if (done) { + return false; + } + + while (current == null) { + if (!mdIter.hasNext()) { + done = true; + return false; + } + current = mdIter.next(); + currentLength = current.bits.length(); + doc = 0; + scoresIdx = 0; + + if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { + current = null; + } else { + doc = -1; // we're calling nextSetBit later on + } + } + + ++doc; + if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { + current = null; + return next(); + } + + return true; + } + + @Override + public float getScore() { + return current.scores == null ? ScoredDocIDsIterator.DEFAULT_SCORE : current.scores[scoresIdx++]; + } + + @Override + public int getDocID() { + return done ? DocIdSetIterator.NO_MORE_DOCS : doc + current.context.docBase; + } + }; + } + + @Override + public DocIdSet getDocIDs() { + return new DocIdSet() { + + final Iterator mdIter = matchingDocs.iterator(); + int doc = 0; + MatchingDocs current; + int currentLength; + boolean done = false; + + @Override + public DocIdSetIterator iterator() throws IOException { + return new DocIdSetIterator() { + + @Override + public int nextDoc() throws IOException { + if (done) { + return DocIdSetIterator.NO_MORE_DOCS; + } + + while (current == null) { + if (!mdIter.hasNext()) { + done = true; + return DocIdSetIterator.NO_MORE_DOCS; + } + current = mdIter.next(); + currentLength = current.bits.length(); + doc = 0; + + if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { + current = null; + } else { + doc = -1; // we're calling nextSetBit later on + } + } + + ++doc; + if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { + current = null; + return nextDoc(); + } + + return doc + current.context.docBase; + } + + @Override + public int docID() { + return doc + current.context.docBase; + } + + @Override + public long cost() { + return size; + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException("not supported"); + } + }; + } + }; + } + + @Override + public int size() { + return size; + } + +} \ No newline at end of file Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/MatchingDocsAsScoredDocIDs.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/old/OldFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/OldFacetsAccumulator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/OldFacetsAccumulator.java (working copy) @@ -0,0 +1,436 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; + +import org.apache.lucene.facet.complements.TotalFacetCounts; +import org.apache.lucene.facet.complements.TotalFacetCountsCache; +import org.apache.lucene.facet.params.FacetIndexingParams; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.partitions.IntermediateFacetResult; +import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler; +import org.apache.lucene.facet.sampling.Sampler.OverSampledFacetRequest; +import org.apache.lucene.facet.search.CategoryListIterator; +import org.apache.lucene.facet.search.CountFacetRequest; +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.FacetRequest; +import org.apache.lucene.facet.search.FacetRequest.ResultMode; +import org.apache.lucene.facet.search.FacetResult; +import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.search.FacetsAggregator; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.search.SumScoreFacetRequest; +import org.apache.lucene.facet.search.TaxonomyFacetsAccumulator; +import org.apache.lucene.facet.search.TopKFacetResultsHandler; +import org.apache.lucene.facet.search.TopKInEachNodeHandler; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.util.PartitionsUtils; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.IntsRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAccumulator} which supports partitions, sampling and + * complement counting. + *

+ * NOTE: this accumulator still uses the old API and will be removed + * eventually in favor of dedicated accumulators which support the above + * features ovee the new {@link FacetsAggregator} API. It provides + * {@link Aggregator} implementations for {@link CountFacetRequest}, + * {@link SumScoreFacetRequest} and {@link OverSampledFacetRequest}. If you need + * to use it in conjunction with other facet requests, you should override + * {@link #createAggregator(FacetRequest, FacetArrays)}. + * + * @lucene.experimental + */ +public class OldFacetsAccumulator extends TaxonomyFacetsAccumulator { + + /** + * Default threshold for using the complements optimization. + * If accumulating facets for a document set larger than this ratio of the index size than + * perform the complement optimization. + * @see #setComplementThreshold(double) for more info on the complements optimization. + */ + public static final double DEFAULT_COMPLEMENT_THRESHOLD = 0.6; + + /** + * Passing this to {@link #setComplementThreshold(double)} will disable using complement optimization. + */ + public static final double DISABLE_COMPLEMENT = Double.POSITIVE_INFINITY; // > 1 actually + + /** + * Passing this to {@link #setComplementThreshold(double)} will force using complement optimization. + */ + public static final double FORCE_COMPLEMENT = 0; // <=0 + + protected int partitionSize; + protected int maxPartitions; + protected boolean isUsingComplements; + + private TotalFacetCounts totalFacetCounts; + + private Object accumulateGuard; + + private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD; + + private static FacetArrays createFacetArrays(FacetSearchParams searchParams, TaxonomyReader taxoReader) { + return new FacetArrays(PartitionsUtils.partitionSize(searchParams.indexingParams, taxoReader)); + } + + public OldFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, + TaxonomyReader taxonomyReader) { + this(searchParams, indexReader, taxonomyReader, null); + } + + public OldFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, + TaxonomyReader taxonomyReader, FacetArrays facetArrays) { + super(searchParams, indexReader, taxonomyReader, facetArrays == null ? createFacetArrays(searchParams, taxonomyReader) : facetArrays); + + // can only be computed later when docids size is known + isUsingComplements = false; + partitionSize = PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader); + maxPartitions = (int) Math.ceil(this.taxonomyReader.getSize() / (double) partitionSize); + accumulateGuard = new Object(); + } + + // TODO: this should be removed once we clean the API + public List accumulate(ScoredDocIDs docids) throws IOException { + + // synchronize to prevent calling two accumulate()'s at the same time. + // We decided not to synchronize the method because that might mislead + // users to feel encouraged to call this method simultaneously. + synchronized (accumulateGuard) { + + // only now we can compute this + isUsingComplements = shouldComplement(docids); + + if (isUsingComplements) { + try { + totalFacetCounts = TotalFacetCountsCache.getSingleton().getTotalCounts(indexReader, taxonomyReader, searchParams.indexingParams); + if (totalFacetCounts != null) { + docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader); + } else { + isUsingComplements = false; + } + } catch (UnsupportedOperationException e) { + // TODO (Facet): this exception is thrown from TotalCountsKey if the + // IndexReader used does not support getVersion(). We should re-think + // this: is this tiny detail worth disabling total counts completely + // for such readers? Currently, it's not supported by Parallel and + // MultiReader, which might be problematic for several applications. + // We could, for example, base our "isCurrent" logic on something else + // than the reader's version. Need to think more deeply about it. + isUsingComplements = false; + } catch (IOException e) { + // silently fail if for some reason failed to load/save from/to dir + isUsingComplements = false; + } catch (Exception e) { + // give up: this should not happen! + throw new IOException("PANIC: Got unexpected exception while trying to get/calculate total counts", e); + } + } + + docids = actualDocsToAccumulate(docids); + + HashMap fr2tmpRes = new HashMap(); + + try { + for (int part = 0; part < maxPartitions; part++) { + + // fill arrays from category lists + fillArraysForPartition(docids, facetArrays, part); + + int offset = part * partitionSize; + + // for each partition we go over all requests and handle + // each, where the request maintains the merged result. + // In this implementation merges happen after each partition, + // but other impl could merge only at the end. + final HashSet handledRequests = new HashSet(); + for (FacetRequest fr : searchParams.facetRequests) { + // Handle and merge only facet requests which were not already handled. + if (handledRequests.add(fr)) { + PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr); + IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(offset); + IntermediateFacetResult oldRes = fr2tmpRes.get(fr); + if (oldRes != null) { + res4fr = frHndlr.mergeResults(oldRes, res4fr); + } + fr2tmpRes.put(fr, res4fr); + } + } + } + } finally { + facetArrays.free(); + } + + // gather results from all requests into a list for returning them + List res = new ArrayList(); + for (FacetRequest fr : searchParams.facetRequests) { + PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr); + IntermediateFacetResult tmpResult = fr2tmpRes.get(fr); + if (tmpResult == null) { + // Add empty FacetResult: + res.add(emptyResult(taxonomyReader.getOrdinal(fr.categoryPath), fr)); + continue; + } + FacetResult facetRes = frHndlr.renderFacetResult(tmpResult); + // final labeling if allowed (because labeling is a costly operation) + frHndlr.labelResult(facetRes); + res.add(facetRes); + } + + return res; + } + } + + /** check if all requests are complementable */ + protected boolean mayComplement() { + for (FacetRequest freq : searchParams.facetRequests) { + if (!(freq instanceof CountFacetRequest)) { + return false; + } + } + return true; + } + + @Override + public PartitionsFacetResultsHandler createFacetResultsHandler(FacetRequest fr) { + if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) { + return new TopKInEachNodeHandler(taxonomyReader, fr, facetArrays); + } else { + return new TopKFacetResultsHandler(taxonomyReader, fr, facetArrays); + } + } + + /** + * Set the actual set of documents over which accumulation should take place. + *

+ * Allows to override the set of documents to accumulate for. Invoked just + * before actual accumulating starts. From this point that set of documents + * remains unmodified. Default implementation just returns the input + * unchanged. + * + * @param docids + * candidate documents to accumulate for + * @return actual documents to accumulate for + */ + protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException { + return docids; + } + + /** Check if it is worth to use complements */ + protected boolean shouldComplement(ScoredDocIDs docids) { + return mayComplement() && (docids.size() > indexReader.numDocs() * getComplementThreshold()) ; + } + + /** + * Iterate over the documents for this partition and fill the facet arrays with the correct + * count/complement count/value. + */ + private final void fillArraysForPartition(ScoredDocIDs docids, FacetArrays facetArrays, int partition) + throws IOException { + + if (isUsingComplements) { + initArraysByTotalCounts(facetArrays, partition, docids.size()); + } else { + facetArrays.free(); // to get a cleared array for this partition + } + + HashMap categoryLists = getCategoryListMap(facetArrays, partition); + + IntsRef ordinals = new IntsRef(32); // a reasonable start capacity for most common apps + for (Entry entry : categoryLists.entrySet()) { + final ScoredDocIDsIterator iterator = docids.iterator(); + final CategoryListIterator categoryListIter = entry.getKey(); + final Aggregator aggregator = entry.getValue(); + Iterator contexts = indexReader.leaves().iterator(); + AtomicReaderContext current = null; + int maxDoc = -1; + while (iterator.next()) { + int docID = iterator.getDocID(); + if (docID >= maxDoc) { + boolean iteratorDone = false; + do { // find the segment which contains this document + if (!contexts.hasNext()) { + throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?"); + } + current = contexts.next(); + maxDoc = current.docBase + current.reader().maxDoc(); + if (docID < maxDoc) { // segment has docs, check if it has categories + boolean validSegment = categoryListIter.setNextReader(current); + validSegment &= aggregator.setNextReader(current); + if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs + while (docID < maxDoc && iterator.next()) { + docID = iterator.getDocID(); + } + if (docID < maxDoc) { + iteratorDone = true; + } + } + } + } while (docID >= maxDoc); + if (iteratorDone) { // iterator finished, terminate the loop + break; + } + } + docID -= current.docBase; + categoryListIter.getOrdinals(docID, ordinals); + if (ordinals.length == 0) { + continue; // document does not have category ordinals + } + aggregator.aggregate(docID, iterator.getScore(), ordinals); + } + } + } + + /** Init arrays for partition by total counts, optionally applying a factor */ + private final void initArraysByTotalCounts(FacetArrays facetArrays, int partition, int nAccumulatedDocs) { + int[] intArray = facetArrays.getIntArray(); + totalFacetCounts.fillTotalCountsForPartition(intArray, partition); + double totalCountsFactor = getTotalCountsFactor(); + // fix total counts, but only if the effect of this would be meaningful. + if (totalCountsFactor < 0.99999) { + int delta = nAccumulatedDocs + 1; + for (int i = 0; i < intArray.length; i++) { + intArray[i] *= totalCountsFactor; + // also translate to prevent loss of non-positive values + // due to complement sampling (ie if sampled docs all decremented a certain category). + intArray[i] += delta; + } + } + } + + /** + * Expert: factor by which counts should be multiplied when initializing + * the count arrays from total counts. + * Default implementation for this returns 1, which is a no op. + * @return a factor by which total counts should be multiplied + */ + protected double getTotalCountsFactor() { + return 1; + } + + protected Aggregator createAggregator(FacetRequest fr, FacetArrays facetArrays) { + if (fr instanceof CountFacetRequest) { + // we rely on that, if needed, result is cleared by arrays! + int[] a = facetArrays.getIntArray(); + if (isUsingComplements) { + return new ComplementCountingAggregator(a); + } else { + return new CountingAggregator(a); + } + } else if (fr instanceof SumScoreFacetRequest) { + if (isUsingComplements) { + throw new IllegalArgumentException("complements are not supported by this SumScoreFacetRequest"); + } else { + return new ScoringAggregator(facetArrays.getFloatArray()); + } + } else if (fr instanceof OverSampledFacetRequest) { + return createAggregator(((OverSampledFacetRequest) fr).orig, facetArrays); + } else { + throw new IllegalArgumentException("unknown Aggregator implementation for request " + fr.getClass()); + } + } + + /** + * Create an {@link Aggregator} and a {@link CategoryListIterator} for each + * and every {@link FacetRequest}. Generating a map, matching each + * categoryListIterator to its matching aggregator. + *

+ * If two CategoryListIterators are served by the same aggregator, a single + * aggregator is returned for both. + * + * NOTE: If a given category list iterator is needed with two different + * aggregators (e.g counting and association) - an exception is thrown as this + * functionality is not supported at this time. + */ + protected HashMap getCategoryListMap(FacetArrays facetArrays, + int partition) throws IOException { + + HashMap categoryLists = new HashMap(); + + FacetIndexingParams indexingParams = searchParams.indexingParams; + for (FacetRequest facetRequest : searchParams.facetRequests) { + Aggregator categoryAggregator = createAggregator(facetRequest, facetArrays); // nocommit remove! facetRequest.createAggregator(isUsingComplements, facetArrays, taxonomyReader); + + CategoryListIterator cli = indexingParams.getCategoryListParams(facetRequest.categoryPath).createCategoryListIterator(partition); + + // get the aggregator + Aggregator old = categoryLists.put(cli, categoryAggregator); + + if (old != null && !old.equals(categoryAggregator)) { + throw new RuntimeException("Overriding existing category list with different aggregator"); + } + // if the aggregator is the same we're covered + } + + return categoryLists; + } + + @Override + public List accumulate(List matchingDocs) throws IOException { + return accumulate(new MatchingDocsAsScoredDocIDs(matchingDocs)); + } + + /** + * Returns the complement threshold. + * @see #setComplementThreshold(double) + */ + public double getComplementThreshold() { + return complementThreshold; + } + + /** + * Set the complement threshold. + * This threshold will dictate whether the complements optimization is applied. + * The optimization is to count for less documents. It is useful when the same + * FacetSearchParams are used for varying sets of documents. The first time + * complements is used the "total counts" are computed - counting for all the + * documents in the collection. Then, only the complementing set of documents + * is considered, and used to decrement from the overall counts, thereby + * walking through less documents, which is faster. + *

+ * For the default settings see {@link #DEFAULT_COMPLEMENT_THRESHOLD}. + *

+ * To forcing complements in all cases pass {@link #FORCE_COMPLEMENT}. + * This is mostly useful for testing purposes, as forcing complements when only + * tiny fraction of available documents match the query does not make sense and + * would incur performance degradations. + *

+ * To disable complements pass {@link #DISABLE_COMPLEMENT}. + * @param complementThreshold the complement threshold to set + * @see #getComplementThreshold() + */ + public void setComplementThreshold(double complementThreshold) { + this.complementThreshold = complementThreshold; + } + + /** Returns true if complements are enabled. */ + public boolean isUsingComplements() { + return isUsingComplements; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/OldFacetsAccumulator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIDs.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIDs.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIDs.java (working copy) @@ -0,0 +1,42 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Document IDs with scores for each, driving facets accumulation. Document + * scores are optionally used in the process of facets scoring. + * + * @see OldFacetsAccumulator#accumulate(ScoredDocIDs) + * @lucene.experimental + */ +public interface ScoredDocIDs { + + /** Returns an iterator over the document IDs and their scores. */ + public ScoredDocIDsIterator iterator() throws IOException; + + /** Returns the set of doc IDs. */ + public DocIdSet getDocIDs(); + + /** Returns the number of scored documents. */ + public int size(); + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIDs.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIDsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIDsIterator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIDsIterator.java (working copy) @@ -0,0 +1,43 @@ +package org.apache.lucene.facet.old; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Iterator over document IDs and their scores. Each {@link #next()} retrieves + * the next docID and its score which can be later be retrieved by + * {@link #getDocID()} and {@link #getScore()}. NOTE: you must call + * {@link #next()} before {@link #getDocID()} and/or {@link #getScore()}, or + * otherwise the returned values are unexpected. + * + * @lucene.experimental + */ +public interface ScoredDocIDsIterator { + + /** Default score used in case scoring is disabled. */ + public static final float DEFAULT_SCORE = 1.0f; + + /** Iterate to the next document/score pair. Returns true iff there is such a pair. */ + public abstract boolean next(); + + /** Returns the ID of the current document. */ + public abstract int getDocID(); + + /** Returns the score of the current document. */ + public abstract float getScore(); + +} \ No newline at end of file Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIDsIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIdsUtils.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIdsUtils.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIdsUtils.java (working copy) @@ -0,0 +1,446 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.facet.old.ScoredDocIDs; +import org.apache.lucene.facet.old.ScoredDocIDsIterator; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.OpenBitSetDISI; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utility methods for Scored Doc IDs. + * + * @lucene.experimental + */ +public class ScoredDocIdsUtils { + + /** + * Create a complement of the input set. The returned {@link ScoredDocIDs} + * does not contain any scores, which makes sense given that the complementing + * documents were not scored. + * + * Note: the complement set does NOT contain doc ids which are noted as deleted by the given reader + * + * @param docids to be complemented. + * @param reader holding the number of documents & information about deletions. + */ + public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader) + throws IOException { + final int maxDoc = reader.maxDoc(); + + DocIdSet docIdSet = docids.getDocIDs(); + final FixedBitSet complement; + if (docIdSet instanceof FixedBitSet) { + // That is the most common case, if ScoredDocIdsCollector was used. + complement = ((FixedBitSet) docIdSet).clone(); + } else { + complement = new FixedBitSet(maxDoc); + DocIdSetIterator iter = docIdSet.iterator(); + int doc; + while ((doc = iter.nextDoc()) < maxDoc) { + complement.set(doc); + } + } + complement.flip(0, maxDoc); + clearDeleted(reader, complement); + + return createScoredDocIds(complement, maxDoc); + } + + /** Clear all deleted documents from a given open-bit-set according to a given reader */ + private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException { + // TODO use BitsFilteredDocIdSet? + + // If there are no deleted docs + if (!reader.hasDeletions()) { + return; // return immediately + } + + DocIdSetIterator it = set.iterator(); + int doc = it.nextDoc(); + for (AtomicReaderContext context : reader.leaves()) { + AtomicReader r = context.reader(); + final int maxDoc = r.maxDoc() + context.docBase; + if (doc >= maxDoc) { // skip this segment + continue; + } + if (!r.hasDeletions()) { // skip all docs that belong to this reader as it has no deletions + while ((doc = it.nextDoc()) < maxDoc) {} + continue; + } + Bits liveDocs = r.getLiveDocs(); + do { + if (!liveDocs.get(doc - context.docBase)) { + set.clear(doc); + } + } while ((doc = it.nextDoc()) < maxDoc); + } + } + + /** + * Create a subset of an existing ScoredDocIDs object. + * + * @param allDocIds orginal set + * @param sampleSet Doc Ids of the subset. + */ + public static final ScoredDocIDs createScoredDocIDsSubset(final ScoredDocIDs allDocIds, + final int[] sampleSet) throws IOException { + + // sort so that we can scan docs in order + final int[] docids = sampleSet; + Arrays.sort(docids); + final float[] scores = new float[docids.length]; + // fetch scores and compute size + ScoredDocIDsIterator it = allDocIds.iterator(); + int n = 0; + while (it.next() && n < docids.length) { + int doc = it.getDocID(); + if (doc == docids[n]) { + scores[n] = it.getScore(); + ++n; + } + } + final int size = n; + + return new ScoredDocIDs() { + + @Override + public DocIdSet getDocIDs() { + return new DocIdSet() { + + @Override + public boolean isCacheable() { return true; } + + @Override + public DocIdSetIterator iterator() { + return new DocIdSetIterator() { + + private int next = -1; + + @Override + public int advance(int target) { + while (next < size && docids[next++] < target) { + } + return next == size ? NO_MORE_DOCS : docids[next]; + } + + @Override + public int docID() { + return docids[next]; + } + + @Override + public int nextDoc() { + if (++next >= size) { + return NO_MORE_DOCS; + } + return docids[next]; + } + + @Override + public long cost() { + return size; + } + }; + } + }; + } + + @Override + public ScoredDocIDsIterator iterator() { + return new ScoredDocIDsIterator() { + + int next = -1; + + @Override + public boolean next() { return ++next < size; } + + @Override + public float getScore() { return scores[next]; } + + @Override + public int getDocID() { return docids[next]; } + }; + } + + @Override + public int size() { return size; } + + }; + } + + /** + * Creates a {@link ScoredDocIDs} which returns document IDs all non-deleted doc ids + * according to the given reader. + * The returned set contains the range of [0 .. reader.maxDoc ) doc ids + */ + public static final ScoredDocIDs createAllDocsScoredDocIDs (final IndexReader reader) { + if (reader.hasDeletions()) { + return new AllLiveDocsScoredDocIDs(reader); + } + return new AllDocsScoredDocIDs(reader); + } + + /** + * Create a ScoredDocIDs out of a given docIdSet and the total number of documents in an index + */ + public static final ScoredDocIDs createScoredDocIds(final DocIdSet docIdSet, final int maxDoc) { + return new ScoredDocIDs() { + private int size = -1; + @Override + public DocIdSet getDocIDs() { return docIdSet; } + + @Override + public ScoredDocIDsIterator iterator() throws IOException { + final DocIdSetIterator docIterator = docIdSet.iterator(); + return new ScoredDocIDsIterator() { + @Override + public boolean next() { + try { + return docIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public float getScore() { return DEFAULT_SCORE; } + + @Override + public int getDocID() { return docIterator.docID(); } + }; + } + + @Override + public int size() { + // lazy size computation + if (size < 0) { + OpenBitSetDISI openBitSetDISI; + try { + openBitSetDISI = new OpenBitSetDISI(docIdSet.iterator(), maxDoc); + } catch (IOException e) { + throw new RuntimeException(e); + } + size = (int) openBitSetDISI.cardinality(); + } + return size; + } + }; + } + + /** + * All docs ScoredDocsIDs - this one is simply an 'all 1' bitset. Used when + * there are no deletions in the index and we wish to go through each and + * every document + */ + private static class AllDocsScoredDocIDs implements ScoredDocIDs { + final int maxDoc; + + public AllDocsScoredDocIDs(IndexReader reader) { + this.maxDoc = reader.maxDoc(); + } + + @Override + public int size() { + return maxDoc; + } + + @Override + public DocIdSet getDocIDs() { + return new DocIdSet() { + + @Override + public boolean isCacheable() { + return true; + } + + @Override + public DocIdSetIterator iterator() { + return new DocIdSetIterator() { + private int next = -1; + + @Override + public int advance(int target) { + if (target <= next) { + target = next + 1; + } + return next = target >= maxDoc ? NO_MORE_DOCS : target; + } + + @Override + public int docID() { + return next; + } + + @Override + public int nextDoc() { + return ++next < maxDoc ? next : NO_MORE_DOCS; + } + + @Override + public long cost() { + return maxDoc; + } + }; + } + }; + } + + @Override + public ScoredDocIDsIterator iterator() { + try { + final DocIdSetIterator iter = getDocIDs().iterator(); + return new ScoredDocIDsIterator() { + @Override + public boolean next() { + try { + return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; + } catch (IOException e) { + // cannot happen + return false; + } + } + + @Override + public float getScore() { + return DEFAULT_SCORE; + } + + @Override + public int getDocID() { + return iter.docID(); + } + }; + } catch (IOException e) { + // cannot happen + throw new RuntimeException(e); + } + } + } + + /** + * An All-docs bitset which has '0' for deleted documents and '1' for the + * rest. Useful for iterating over all 'live' documents in a given index. + *

+ * NOTE: this class would work for indexes with no deletions at all, + * although it is recommended to use {@link AllDocsScoredDocIDs} to ease + * the performance cost of validating isDeleted() on each and every docId + */ + private static final class AllLiveDocsScoredDocIDs implements ScoredDocIDs { + final int maxDoc; + final IndexReader reader; + + AllLiveDocsScoredDocIDs(IndexReader reader) { + this.maxDoc = reader.maxDoc(); + this.reader = reader; + } + + @Override + public int size() { + return reader.numDocs(); + } + + @Override + public DocIdSet getDocIDs() { + return new DocIdSet() { + + @Override + public boolean isCacheable() { + return true; + } + + @Override + public DocIdSetIterator iterator() { + return new DocIdSetIterator() { + final Bits liveDocs = MultiFields.getLiveDocs(reader); + private int next = -1; + + @Override + public int advance(int target) { + if (target > next) { + next = target - 1; + } + return nextDoc(); + } + + @Override + public int docID() { + return next; + } + + @Override + public int nextDoc() { + do { + ++next; + } while (next < maxDoc && liveDocs != null && !liveDocs.get(next)); + + return next < maxDoc ? next : NO_MORE_DOCS; + } + + @Override + public long cost() { + return maxDoc; + } + }; + } + }; + } + + @Override + public ScoredDocIDsIterator iterator() { + try { + final DocIdSetIterator iter = getDocIDs().iterator(); + return new ScoredDocIDsIterator() { + @Override + public boolean next() { + try { + return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; + } catch (IOException e) { + // cannot happen + return false; + } + } + + @Override + public float getScore() { + return DEFAULT_SCORE; + } + + @Override + public int getDocID() { + return iter.docID(); + } + }; + } catch (IOException e) { + // cannot happen + throw new RuntimeException(e); + } + } + } + +} \ No newline at end of file Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/ScoredDocIdsUtils.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/old/ScoringAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/old/ScoringAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/old/ScoringAggregator.java (working copy) @@ -0,0 +1,67 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.IntsRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An {@link Aggregator} which updates the weight of a category according to the + * scores of the documents it was found in. + * + * @lucene.experimental + */ +public class ScoringAggregator implements Aggregator { + + private final float[] scoreArray; + private final int hashCode; + + public ScoringAggregator(float[] counterArray) { + this.scoreArray = counterArray; + this.hashCode = scoreArray == null ? 0 : scoreArray.hashCode(); + } + + @Override + public void aggregate(int docID, float score, IntsRef ordinals) throws IOException { + for (int i = 0; i < ordinals.length; i++) { + scoreArray[ordinals.ints[i]] += score; + } + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != this.getClass()) { + return false; + } + ScoringAggregator that = (ScoringAggregator) obj; + return that.scoreArray == this.scoreArray; + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public boolean setNextReader(AtomicReaderContext context) throws IOException { + return true; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/old/ScoringAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/partitions/PartitionsFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/partitions/PartitionsFacetResultsHandler.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/partitions/PartitionsFacetResultsHandler.java (working copy) @@ -2,13 +2,13 @@ import java.io.IOException; +import org.apache.lucene.facet.old.OldFacetsAccumulator; +import org.apache.lucene.facet.old.ScoredDocIDs; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetResultsHandler; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* @@ -103,7 +103,7 @@ /** * Label results according to settings in {@link FacetRequest}, such as * {@link FacetRequest#getNumLabel()}. Usually invoked by - * {@link StandardFacetsAccumulator#accumulate(ScoredDocIDs)} + * {@link OldFacetsAccumulator#accumulate(ScoredDocIDs)} * * @param facetResult * facet result to be labeled. Index: lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java (working copy) @@ -20,12 +20,10 @@ import java.util.List; import org.apache.lucene.facet.params.FacetIndexingParams; -import org.apache.lucene.facet.search.Aggregator; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; /** * Facet request for dynamic ranges based on a @@ -50,11 +48,6 @@ } @Override - public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) { - throw new UnsupportedOperationException(); - } - - @Override public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { return null; } Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/RandomSampler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/RandomSampler.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/RandomSampler.java (working copy) @@ -3,9 +3,9 @@ import java.io.IOException; import java.util.Random; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.ScoredDocIDsIterator; -import org.apache.lucene.facet.util.ScoredDocIdsUtils; +import org.apache.lucene.facet.old.ScoredDocIDs; +import org.apache.lucene.facet.old.ScoredDocIDsIterator; +import org.apache.lucene.facet.old.ScoredDocIdsUtils; /* * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/RepeatableSampler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/RepeatableSampler.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/RepeatableSampler.java (working copy) @@ -5,12 +5,11 @@ import java.util.logging.Level; import java.util.logging.Logger; +import org.apache.lucene.facet.old.ScoredDocIDs; +import org.apache.lucene.facet.old.ScoredDocIDsIterator; +import org.apache.lucene.facet.old.ScoredDocIdsUtils; import org.apache.lucene.util.PriorityQueue; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.ScoredDocIDsIterator; -import org.apache.lucene.facet.util.ScoredDocIdsUtils; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java (working copy) @@ -2,9 +2,9 @@ import java.io.IOException; +import org.apache.lucene.facet.old.ScoredDocIDs; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; -import org.apache.lucene.facet.search.ScoredDocIDs; /* * Licensed to the Apache Software Foundation (ASF) under one or more Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java (working copy) @@ -4,16 +4,14 @@ import java.util.ArrayList; import java.util.List; +import org.apache.lucene.facet.old.ScoredDocIDs; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.search.Aggregator; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetsAggregator; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -198,15 +196,9 @@ return res; } - /** - * Wrapping a facet request for over sampling. - * Implementation detail: even if the original request is a count request, no - * statistics will be computed for it as the wrapping is not a count request. - * This is ok, as the sampling accumulator is later computing the statistics - * over the original requests. - */ - private static class OverSampledFacetRequest extends FacetRequest { - final FacetRequest orig; + /** Wrapping a facet request for over sampling. */ + public static class OverSampledFacetRequest extends FacetRequest { + public final FacetRequest orig; public OverSampledFacetRequest(FacetRequest orig, int num) { super(orig.categoryPath, num); this.orig = orig; @@ -222,12 +214,6 @@ } @Override - public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) - throws IOException { - return orig.createAggregator(useComplements, arrays, taxonomy); - } - - @Override public FacetArraysSource getFacetArraysSource() { return orig.getFacetArraysSource(); } Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java (working copy) @@ -4,6 +4,8 @@ import java.util.ArrayList; import java.util.List; +import org.apache.lucene.facet.old.OldFacetsAccumulator; +import org.apache.lucene.facet.old.ScoredDocIDs; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler; import org.apache.lucene.facet.sampling.Sampler.SampleResult; @@ -10,8 +12,6 @@ import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetsAccumulator; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; @@ -38,10 +38,10 @@ * Note two major differences between this class and {@link SamplingWrapper}: *

    *
  1. Latter can wrap any other {@link FacetsAccumulator} while this class - * directly extends {@link StandardFacetsAccumulator}.
  2. + * directly extends {@link OldFacetsAccumulator}. *
  3. This class can effectively apply sampling on the complement set of * matching document, thereby working efficiently with the complement - * optimization - see {@link StandardFacetsAccumulator#getComplementThreshold()} + * optimization - see {@link OldFacetsAccumulator#getComplementThreshold()} * .
  4. *
*

@@ -52,7 +52,7 @@ * @see Sampler * @lucene.experimental */ -public class SamplingAccumulator extends StandardFacetsAccumulator { +public class SamplingAccumulator extends OldFacetsAccumulator { private double samplingRatio = -1d; private final Sampler sampler; Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java (working copy) @@ -32,19 +32,19 @@ /** * Default ratio between size of sample to original size of document set. - * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs) + * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs) */ public static final double DEFAULT_SAMPLE_RATIO = 0.01; /** * Default maximum size of sample. - * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs) + * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs) */ public static final int DEFAULT_MAX_SAMPLE_SIZE = 10000; /** * Default minimum size of sample. - * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs) + * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs) */ public static final int DEFAULT_MIN_SAMPLE_SIZE = 100; @@ -65,7 +65,7 @@ /** * Return the maxSampleSize. * In no case should the resulting sample size exceed this value. - * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs) + * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs) */ public final int getMaxSampleSize() { return maxSampleSize; @@ -74,7 +74,7 @@ /** * Return the minSampleSize. * In no case should the resulting sample size be smaller than this value. - * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs) + * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs) */ public final int getMinSampleSize() { return minSampleSize; @@ -82,7 +82,7 @@ /** * @return the sampleRatio - * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs) + * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs) */ public final double getSampleRatio() { return sampleRatio; Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java (working copy) @@ -4,12 +4,12 @@ import java.util.ArrayList; import java.util.List; +import org.apache.lucene.facet.old.OldFacetsAccumulator; +import org.apache.lucene.facet.old.ScoredDocIDs; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler; import org.apache.lucene.facet.sampling.Sampler.SampleResult; import org.apache.lucene.facet.search.FacetResult; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* @@ -38,12 +38,12 @@ * * @lucene.experimental */ -public class SamplingWrapper extends StandardFacetsAccumulator { +public class SamplingWrapper extends OldFacetsAccumulator { - private StandardFacetsAccumulator delegee; + private OldFacetsAccumulator delegee; private Sampler sampler; - public SamplingWrapper(StandardFacetsAccumulator delegee, Sampler sampler) { + public SamplingWrapper(OldFacetsAccumulator delegee, Sampler sampler) { super(delegee.searchParams, delegee.indexReader, delegee.taxonomyReader); this.delegee = delegee; this.sampler = sampler; Index: lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java (working copy) @@ -2,11 +2,11 @@ import java.io.IOException; +import org.apache.lucene.facet.old.ScoredDocIDs; +import org.apache.lucene.facet.old.ScoredDocIDsIterator; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.DrillDownQuery; import org.apache.lucene.facet.search.FacetResultNode; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.ScoredDocIDsIterator; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.DocsEnum; Index: lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java (working copy) @@ -1,113 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; -import java.util.List; - -import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.sampling.RandomSampler; -import org.apache.lucene.facet.sampling.Sampler; -import org.apache.lucene.facet.sampling.SamplingAccumulator; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.index.IndexReader; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * {@link FacetsAccumulator} whose behavior regarding complements, sampling, - * etc. is not set up front but rather is determined at accumulation time - * according to the statistics of the accumulated set of documents and the - * index. - *

- * Note: Sampling accumulation (Accumulation over a sampled-set of the results), - * does not guarantee accurate values for - * {@link FacetResult#getNumValidDescendants()}. - * - * @lucene.experimental - */ -public final class AdaptiveFacetsAccumulator extends StandardFacetsAccumulator { - - private Sampler sampler = new RandomSampler(); - - /** - * Create an {@link AdaptiveFacetsAccumulator} - * @see StandardFacetsAccumulator#StandardFacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader) - */ - public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, - TaxonomyReader taxonomyReader) { - super(searchParams, indexReader, taxonomyReader); - } - - /** - * Create an {@link AdaptiveFacetsAccumulator} - * - * @see StandardFacetsAccumulator#StandardFacetsAccumulator(FacetSearchParams, - * IndexReader, TaxonomyReader, FacetArrays) - */ - public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, - TaxonomyReader taxonomyReader, FacetArrays facetArrays) { - super(searchParams, indexReader, taxonomyReader, facetArrays); - } - - /** - * Set the sampler. - * @param sampler sampler to set - */ - public void setSampler(Sampler sampler) { - this.sampler = sampler; - } - - @Override - public List accumulate(ScoredDocIDs docids) throws IOException { - StandardFacetsAccumulator delegee = appropriateFacetCountingAccumulator(docids); - - if (delegee == this) { - return super.accumulate(docids); - } - - return delegee.accumulate(docids); - } - - /** - * Compute the appropriate facet accumulator to use. - * If no special/clever adaptation is possible/needed return this (self). - */ - private StandardFacetsAccumulator appropriateFacetCountingAccumulator(ScoredDocIDs docids) { - // Verify that searchPareams permit sampling/complement/etc... otherwise do default - if (!mayComplement()) { - return this; - } - - // Now we're sure we can use the sampling methods as we're in a counting only mode - - // Verify that sampling is enabled and required ... otherwise do default - if (sampler == null || !sampler.shouldSample(docids)) { - return this; - } - - SamplingAccumulator samplingAccumulator = new SamplingAccumulator(sampler, searchParams, indexReader, taxonomyReader); - samplingAccumulator.setComplementThreshold(getComplementThreshold()); - return samplingAccumulator; - } - - /** - * @return the sampler in effect - */ - public final Sampler getSampler() { - return sampler; - } -} \ No newline at end of file Index: lucene/facet/src/java/org/apache/lucene/facet/search/Aggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/Aggregator.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/Aggregator.java (working copy) @@ -1,48 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.util.IntsRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Aggregates the categories of documents given to - * {@link #aggregate(int, float, IntsRef)}. Note that the document IDs are local - * to the reader given to {@link #setNextReader(AtomicReaderContext)}. - * - * @lucene.experimental - */ -public interface Aggregator { - - /** - * Sets the {@link AtomicReaderContext} for which - * {@link #aggregate(int, float, IntsRef)} calls will be made. If this method - * returns false, {@link #aggregate(int, float, IntsRef)} should not be called - * for this reader. - */ - public boolean setNextReader(AtomicReaderContext context) throws IOException; - - /** - * Aggregate the ordinals of the given document ID (and its score). The given - * ordinals offset is always zero. - */ - public void aggregate(int docID, float score, IntsRef ordinals) throws IOException; - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java (working copy) @@ -1,9 +1,7 @@ package org.apache.lucene.facet.search; -import org.apache.lucene.facet.complements.ComplementCountingAggregator; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -33,18 +31,7 @@ super(path, num); } - // TODO nuke Aggregator and move this logic to StandardFacetsAccumulator -- it should only be used for counting @Override - public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) { - // we rely on that, if needed, result is cleared by arrays! - int[] a = arrays.getIntArray(); - if (useComplements) { - return new ComplementCountingAggregator(a); - } - return new CountingAggregator(a); - } - - @Override public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { return CountingFacetsAggregator.create(fip.getCategoryListParams(categoryPath)); } Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountingAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CountingAggregator.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CountingAggregator.java (working copy) @@ -1,66 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.util.IntsRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A CountingAggregator updates a counter array with the size of the whole - * taxonomy, counting the number of times each category appears in the given set - * of documents. - * - * @lucene.experimental - */ -public class CountingAggregator implements Aggregator { - - protected int[] counterArray; - - public CountingAggregator(int[] counterArray) { - this.counterArray = counterArray; - } - - @Override - public void aggregate(int docID, float score, IntsRef ordinals) throws IOException { - for (int i = 0; i < ordinals.length; i++) { - counterArray[ordinals.ints[i]]++; - } - } - - @Override - public boolean equals(Object obj) { - if (obj == null || obj.getClass() != this.getClass()) { - return false; - } - CountingAggregator that = (CountingAggregator) obj; - return that.counterArray == this.counterArray; - } - - @Override - public int hashCode() { - return counterArray == null ? 0 : counterArray.hashCode(); - } - - @Override - public boolean setNextReader(AtomicReaderContext context) throws IOException { - return true; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java (working copy) @@ -1,12 +1,9 @@ package org.apache.lucene.facet.search; -import java.io.IOException; - import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.range.RangeFacetRequest; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -122,26 +119,6 @@ } /** - * Create an aggregator for this facet request. Aggregator action depends on - * request definition. For a count request, it will usually increment the - * count for that facet. - * - * @param useComplements - * whether the complements optimization is being used for current - * computation. - * @param arrays - * provider for facet arrays in use for current computation. - * @param taxonomy - * reader of taxonomy in effect. - * @throws IOException If there is a low-level I/O error. - */ - public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) - throws IOException { - throw new UnsupportedOperationException("this FacetRequest does not support this type of Aggregator anymore; " + - "you should override FacetsAccumulator to return the proper FacetsAggregator"); - } - - /** * Returns the {@link FacetsAggregator} which can aggregate the categories of * this facet request. The aggregator is expected to aggregate category values * into {@link FacetArrays}. If the facet request does not support that, e.g. Index: lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java (working copy) @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.lucene.facet.old.OldFacetsAccumulator; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.range.RangeAccumulator; @@ -72,7 +73,7 @@ public static FacetsAccumulator create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader, FacetArrays arrays) { if (fsp.indexingParams.getPartitionSize() != Integer.MAX_VALUE) { - return new StandardFacetsAccumulator(fsp, indexReader, taxoReader, arrays); + return new OldFacetsAccumulator(fsp, indexReader, taxoReader, arrays); } List rangeRequests = new ArrayList(); Index: lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java (working copy) @@ -1,174 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; -import java.util.Iterator; -import java.util.List; - -import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Represents {@link MatchingDocs} as {@link ScoredDocIDs}. - * - * @lucene.experimental - */ -public class MatchingDocsAsScoredDocIDs implements ScoredDocIDs { - - // TODO remove this class once we get rid of ScoredDocIDs - - final List matchingDocs; - final int size; - - public MatchingDocsAsScoredDocIDs(List matchingDocs) { - this.matchingDocs = matchingDocs; - int totalSize = 0; - for (MatchingDocs md : matchingDocs) { - totalSize += md.totalHits; - } - this.size = totalSize; - } - - @Override - public ScoredDocIDsIterator iterator() throws IOException { - return new ScoredDocIDsIterator() { - - final Iterator mdIter = matchingDocs.iterator(); - - int scoresIdx = 0; - int doc = 0; - MatchingDocs current; - int currentLength; - boolean done = false; - - @Override - public boolean next() { - if (done) { - return false; - } - - while (current == null) { - if (!mdIter.hasNext()) { - done = true; - return false; - } - current = mdIter.next(); - currentLength = current.bits.length(); - doc = 0; - scoresIdx = 0; - - if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { - current = null; - } else { - doc = -1; // we're calling nextSetBit later on - } - } - - ++doc; - if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { - current = null; - return next(); - } - - return true; - } - - @Override - public float getScore() { - return current.scores == null ? ScoredDocIDsIterator.DEFAULT_SCORE : current.scores[scoresIdx++]; - } - - @Override - public int getDocID() { - return done ? DocIdSetIterator.NO_MORE_DOCS : doc + current.context.docBase; - } - }; - } - - @Override - public DocIdSet getDocIDs() { - return new DocIdSet() { - - final Iterator mdIter = matchingDocs.iterator(); - int doc = 0; - MatchingDocs current; - int currentLength; - boolean done = false; - - @Override - public DocIdSetIterator iterator() throws IOException { - return new DocIdSetIterator() { - - @Override - public int nextDoc() throws IOException { - if (done) { - return DocIdSetIterator.NO_MORE_DOCS; - } - - while (current == null) { - if (!mdIter.hasNext()) { - done = true; - return DocIdSetIterator.NO_MORE_DOCS; - } - current = mdIter.next(); - currentLength = current.bits.length(); - doc = 0; - - if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { - current = null; - } else { - doc = -1; // we're calling nextSetBit later on - } - } - - ++doc; - if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { - current = null; - return nextDoc(); - } - - return doc + current.context.docBase; - } - - @Override - public int docID() { - return doc + current.context.docBase; - } - - @Override - public long cost() { - return size; - } - - @Override - public int advance(int target) throws IOException { - throw new UnsupportedOperationException("not supported"); - } - }; - } - }; - } - - @Override - public int size() { - return size; - } - -} \ No newline at end of file Index: lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIDs.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIDs.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIDs.java (working copy) @@ -1,42 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; - -import org.apache.lucene.search.DocIdSet; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Document IDs with scores for each, driving facets accumulation. Document - * scores are optionally used in the process of facets scoring. - * - * @see StandardFacetsAccumulator#accumulate(ScoredDocIDs) - * @lucene.experimental - */ -public interface ScoredDocIDs { - - /** Returns an iterator over the document IDs and their scores. */ - public ScoredDocIDsIterator iterator() throws IOException; - - /** Returns the set of doc IDs. */ - public DocIdSet getDocIDs(); - - /** Returns the number of scored documents. */ - public int size(); - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIDsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIDsIterator.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIDsIterator.java (working copy) @@ -1,43 +0,0 @@ -package org.apache.lucene.facet.search; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Iterator over document IDs and their scores. Each {@link #next()} retrieves - * the next docID and its score which can be later be retrieved by - * {@link #getDocID()} and {@link #getScore()}. NOTE: you must call - * {@link #next()} before {@link #getDocID()} and/or {@link #getScore()}, or - * otherwise the returned values are unexpected. - * - * @lucene.experimental - */ -public interface ScoredDocIDsIterator { - - /** Default score used in case scoring is disabled. */ - public static final float DEFAULT_SCORE = 1.0f; - - /** Iterate to the next document/score pair. Returns true iff there is such a pair. */ - public abstract boolean next(); - - /** Returns the ID of the current document. */ - public abstract int getDocID(); - - /** Returns the score of the current document. */ - public abstract float getScore(); - -} \ No newline at end of file Index: lucene/facet/src/java/org/apache/lucene/facet/search/ScoringAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/ScoringAggregator.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/ScoringAggregator.java (working copy) @@ -1,67 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.util.IntsRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An {@link Aggregator} which updates the weight of a category according to the - * scores of the documents it was found in. - * - * @lucene.experimental - */ -public class ScoringAggregator implements Aggregator { - - private final float[] scoreArray; - private final int hashCode; - - public ScoringAggregator(float[] counterArray) { - this.scoreArray = counterArray; - this.hashCode = scoreArray == null ? 0 : scoreArray.hashCode(); - } - - @Override - public void aggregate(int docID, float score, IntsRef ordinals) throws IOException { - for (int i = 0; i < ordinals.length; i++) { - scoreArray[ordinals.ints[i]] += score; - } - } - - @Override - public boolean equals(Object obj) { - if (obj == null || obj.getClass() != this.getClass()) { - return false; - } - ScoringAggregator that = (ScoringAggregator) obj; - return that.scoreArray == this.scoreArray; - } - - @Override - public int hashCode() { - return hashCode; - } - - @Override - public boolean setNextReader(AtomicReaderContext context) throws IOException { - return true; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (working copy) @@ -1,421 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map.Entry; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.lucene.facet.complements.TotalFacetCounts; -import org.apache.lucene.facet.complements.TotalFacetCountsCache; -import org.apache.lucene.facet.params.FacetIndexingParams; -import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.partitions.IntermediateFacetResult; -import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler; -import org.apache.lucene.facet.search.FacetRequest.ResultMode; -import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.facet.util.PartitionsUtils; -import org.apache.lucene.facet.util.ScoredDocIdsUtils; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.IntsRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Standard implementation for {@link TaxonomyFacetsAccumulator}, utilizing partitions to save on memory. - *

- * Why partitions? Because if there are say 100M categories out of which - * only top K are required, we must first compute value for all 100M categories - * (going over all documents) and only then could we select top K. - * This is made easier on memory by working in partitions of distinct categories: - * Once a values for a partition are found, we take the top K for that - * partition and work on the next partition, them merge the top K of both, - * and so forth, thereby computing top K with RAM needs for the size of - * a single partition rather than for the size of all the 100M categories. - *

- * Decision on partitions size is done at indexing time, and the facet information - * for each partition is maintained separately. - *

- * Implementation detail: Since facets information of each partition is - * maintained in a separate "category list", we can be more efficient - * at search time, because only the facet info for a single partition - * need to be read while processing that partition. - * - * @lucene.experimental - */ -public class StandardFacetsAccumulator extends TaxonomyFacetsAccumulator { - - private static final Logger logger = Logger.getLogger(StandardFacetsAccumulator.class.getName()); - - /** - * Default threshold for using the complements optimization. - * If accumulating facets for a document set larger than this ratio of the index size than - * perform the complement optimization. - * @see #setComplementThreshold(double) for more info on the complements optimization. - */ - public static final double DEFAULT_COMPLEMENT_THRESHOLD = 0.6; - - /** - * Passing this to {@link #setComplementThreshold(double)} will disable using complement optimization. - */ - public static final double DISABLE_COMPLEMENT = Double.POSITIVE_INFINITY; // > 1 actually - - /** - * Passing this to {@link #setComplementThreshold(double)} will force using complement optimization. - */ - public static final double FORCE_COMPLEMENT = 0; // <=0 - - protected int partitionSize; - protected int maxPartitions; - protected boolean isUsingComplements; - - private TotalFacetCounts totalFacetCounts; - - private Object accumulateGuard; - - private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD; - - private static FacetArrays createFacetArrays(FacetSearchParams searchParams, TaxonomyReader taxoReader) { - return new FacetArrays(PartitionsUtils.partitionSize(searchParams.indexingParams, taxoReader)); - } - - public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, - TaxonomyReader taxonomyReader) { - this(searchParams, indexReader, taxonomyReader, null); - } - - public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, - TaxonomyReader taxonomyReader, FacetArrays facetArrays) { - super(searchParams, indexReader, taxonomyReader, facetArrays == null ? createFacetArrays(searchParams, taxonomyReader) : facetArrays); - - // can only be computed later when docids size is known - isUsingComplements = false; - partitionSize = PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader); - maxPartitions = (int) Math.ceil(this.taxonomyReader.getSize() / (double) partitionSize); - accumulateGuard = new Object(); - } - - // TODO: this should be removed once we clean the API - public List accumulate(ScoredDocIDs docids) throws IOException { - - // synchronize to prevent calling two accumulate()'s at the same time. - // We decided not to synchronize the method because that might mislead - // users to feel encouraged to call this method simultaneously. - synchronized (accumulateGuard) { - - // only now we can compute this - isUsingComplements = shouldComplement(docids); - - if (isUsingComplements) { - try { - totalFacetCounts = TotalFacetCountsCache.getSingleton().getTotalCounts(indexReader, taxonomyReader, searchParams.indexingParams); - if (totalFacetCounts != null) { - docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader); - } else { - isUsingComplements = false; - } - } catch (UnsupportedOperationException e) { - // TODO (Facet): this exception is thrown from TotalCountsKey if the - // IndexReader used does not support getVersion(). We should re-think - // this: is this tiny detail worth disabling total counts completely - // for such readers? Currently, it's not supported by Parallel and - // MultiReader, which might be problematic for several applications. - // We could, for example, base our "isCurrent" logic on something else - // than the reader's version. Need to think more deeply about it. - if (logger.isLoggable(Level.FINEST)) { - logger.log(Level.FINEST, "IndexReader used does not support completents: ", e); - } - isUsingComplements = false; - } catch (IOException e) { - if (logger.isLoggable(Level.FINEST)) { - logger.log(Level.FINEST, "Failed to load/calculate total counts (complement counting disabled): ", e); - } - // silently fail if for some reason failed to load/save from/to dir - isUsingComplements = false; - } catch (Exception e) { - // give up: this should not happen! - throw new IOException("PANIC: Got unexpected exception while trying to get/calculate total counts", e); - } - } - - docids = actualDocsToAccumulate(docids); - - HashMap fr2tmpRes = new HashMap(); - - try { - for (int part = 0; part < maxPartitions; part++) { - - // fill arrays from category lists - fillArraysForPartition(docids, facetArrays, part); - - int offset = part * partitionSize; - - // for each partition we go over all requests and handle - // each, where the request maintains the merged result. - // In this implementation merges happen after each partition, - // but other impl could merge only at the end. - final HashSet handledRequests = new HashSet(); - for (FacetRequest fr : searchParams.facetRequests) { - // Handle and merge only facet requests which were not already handled. - if (handledRequests.add(fr)) { - PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr); - IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(offset); - IntermediateFacetResult oldRes = fr2tmpRes.get(fr); - if (oldRes != null) { - res4fr = frHndlr.mergeResults(oldRes, res4fr); - } - fr2tmpRes.put(fr, res4fr); - } - } - } - } finally { - facetArrays.free(); - } - - // gather results from all requests into a list for returning them - List res = new ArrayList(); - for (FacetRequest fr : searchParams.facetRequests) { - PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr); - IntermediateFacetResult tmpResult = fr2tmpRes.get(fr); - if (tmpResult == null) { - // Add empty FacetResult: - res.add(emptyResult(taxonomyReader.getOrdinal(fr.categoryPath), fr)); - continue; - } - FacetResult facetRes = frHndlr.renderFacetResult(tmpResult); - // final labeling if allowed (because labeling is a costly operation) - frHndlr.labelResult(facetRes); - res.add(facetRes); - } - - return res; - } - } - - /** check if all requests are complementable */ - protected boolean mayComplement() { - for (FacetRequest freq : searchParams.facetRequests) { - if (!(freq instanceof CountFacetRequest)) { - return false; - } - } - return true; - } - - @Override - protected PartitionsFacetResultsHandler createFacetResultsHandler(FacetRequest fr) { - if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) { - return new TopKInEachNodeHandler(taxonomyReader, fr, facetArrays); - } else { - return new TopKFacetResultsHandler(taxonomyReader, fr, facetArrays); - } - } - - /** - * Set the actual set of documents over which accumulation should take place. - *

- * Allows to override the set of documents to accumulate for. Invoked just - * before actual accumulating starts. From this point that set of documents - * remains unmodified. Default implementation just returns the input - * unchanged. - * - * @param docids - * candidate documents to accumulate for - * @return actual documents to accumulate for - */ - protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException { - return docids; - } - - /** Check if it is worth to use complements */ - protected boolean shouldComplement(ScoredDocIDs docids) { - return mayComplement() && (docids.size() > indexReader.numDocs() * getComplementThreshold()) ; - } - - /** - * Iterate over the documents for this partition and fill the facet arrays with the correct - * count/complement count/value. - */ - private final void fillArraysForPartition(ScoredDocIDs docids, FacetArrays facetArrays, int partition) - throws IOException { - - if (isUsingComplements) { - initArraysByTotalCounts(facetArrays, partition, docids.size()); - } else { - facetArrays.free(); // to get a cleared array for this partition - } - - HashMap categoryLists = getCategoryListMap(facetArrays, partition); - - IntsRef ordinals = new IntsRef(32); // a reasonable start capacity for most common apps - for (Entry entry : categoryLists.entrySet()) { - final ScoredDocIDsIterator iterator = docids.iterator(); - final CategoryListIterator categoryListIter = entry.getKey(); - final Aggregator aggregator = entry.getValue(); - Iterator contexts = indexReader.leaves().iterator(); - AtomicReaderContext current = null; - int maxDoc = -1; - while (iterator.next()) { - int docID = iterator.getDocID(); - if (docID >= maxDoc) { - boolean iteratorDone = false; - do { // find the segment which contains this document - if (!contexts.hasNext()) { - throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?"); - } - current = contexts.next(); - maxDoc = current.docBase + current.reader().maxDoc(); - if (docID < maxDoc) { // segment has docs, check if it has categories - boolean validSegment = categoryListIter.setNextReader(current); - validSegment &= aggregator.setNextReader(current); - if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs - while (docID < maxDoc && iterator.next()) { - docID = iterator.getDocID(); - } - if (docID < maxDoc) { - iteratorDone = true; - } - } - } - } while (docID >= maxDoc); - if (iteratorDone) { // iterator finished, terminate the loop - break; - } - } - docID -= current.docBase; - categoryListIter.getOrdinals(docID, ordinals); - if (ordinals.length == 0) { - continue; // document does not have category ordinals - } - aggregator.aggregate(docID, iterator.getScore(), ordinals); - } - } - } - - /** Init arrays for partition by total counts, optionally applying a factor */ - private final void initArraysByTotalCounts(FacetArrays facetArrays, int partition, int nAccumulatedDocs) { - int[] intArray = facetArrays.getIntArray(); - totalFacetCounts.fillTotalCountsForPartition(intArray, partition); - double totalCountsFactor = getTotalCountsFactor(); - // fix total counts, but only if the effect of this would be meaningful. - if (totalCountsFactor < 0.99999) { - int delta = nAccumulatedDocs + 1; - for (int i = 0; i < intArray.length; i++) { - intArray[i] *= totalCountsFactor; - // also translate to prevent loss of non-positive values - // due to complement sampling (ie if sampled docs all decremented a certain category). - intArray[i] += delta; - } - } - } - - /** - * Expert: factor by which counts should be multiplied when initializing - * the count arrays from total counts. - * Default implementation for this returns 1, which is a no op. - * @return a factor by which total counts should be multiplied - */ - protected double getTotalCountsFactor() { - return 1; - } - - /** - * Create an {@link Aggregator} and a {@link CategoryListIterator} for each - * and every {@link FacetRequest}. Generating a map, matching each - * categoryListIterator to its matching aggregator. - *

- * If two CategoryListIterators are served by the same aggregator, a single - * aggregator is returned for both. - * - * NOTE: If a given category list iterator is needed with two different - * aggregators (e.g counting and association) - an exception is thrown as this - * functionality is not supported at this time. - */ - protected HashMap getCategoryListMap(FacetArrays facetArrays, - int partition) throws IOException { - - HashMap categoryLists = new HashMap(); - - FacetIndexingParams indexingParams = searchParams.indexingParams; - for (FacetRequest facetRequest : searchParams.facetRequests) { - Aggregator categoryAggregator = facetRequest.createAggregator(isUsingComplements, facetArrays, taxonomyReader); - - CategoryListIterator cli = indexingParams.getCategoryListParams(facetRequest.categoryPath).createCategoryListIterator(partition); - - // get the aggregator - Aggregator old = categoryLists.put(cli, categoryAggregator); - - if (old != null && !old.equals(categoryAggregator)) { - throw new RuntimeException("Overriding existing category list with different aggregator"); - } - // if the aggregator is the same we're covered - } - - return categoryLists; - } - - @Override - public List accumulate(List matchingDocs) throws IOException { - return accumulate(new MatchingDocsAsScoredDocIDs(matchingDocs)); - } - - /** - * Returns the complement threshold. - * @see #setComplementThreshold(double) - */ - public double getComplementThreshold() { - return complementThreshold; - } - - /** - * Set the complement threshold. - * This threshold will dictate whether the complements optimization is applied. - * The optimization is to count for less documents. It is useful when the same - * FacetSearchParams are used for varying sets of documents. The first time - * complements is used the "total counts" are computed - counting for all the - * documents in the collection. Then, only the complementing set of documents - * is considered, and used to decrement from the overall counts, thereby - * walking through less documents, which is faster. - *

- * For the default settings see {@link #DEFAULT_COMPLEMENT_THRESHOLD}. - *

- * To forcing complements in all cases pass {@link #FORCE_COMPLEMENT}. - * This is mostly useful for testing purposes, as forcing complements when only - * tiny fraction of available documents match the query does not make sense and - * would incur performance degradations. - *

- * To disable complements pass {@link #DISABLE_COMPLEMENT}. - * @param complementThreshold the complement threshold to set - * @see #getComplementThreshold() - */ - public void setComplementThreshold(double complementThreshold) { - this.complementThreshold = complementThreshold; - } - - /** Returns true if complements are enabled. */ - public boolean isUsingComplements() { - return isUsingComplements; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetRequest.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetRequest.java (working copy) @@ -2,7 +2,6 @@ import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -40,12 +39,6 @@ } @Override - public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) { - assert !useComplements : "complements are not supported by this FacetRequest"; - return new ScoringAggregator(arrays.getFloatArray()); - } - - @Override public double getValueOf(FacetArrays arrays, int ordinal) { return arrays.getFloatArray()[ordinal]; } Index: lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java (revision 1508077) +++ lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java (working copy) @@ -1,446 +0,0 @@ -package org.apache.lucene.facet.util; - -import java.io.IOException; -import java.util.Arrays; - -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.ScoredDocIDsIterator; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiFields; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.OpenBitSetDISI; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Utility methods for Scored Doc IDs. - * - * @lucene.experimental - */ -public class ScoredDocIdsUtils { - - /** - * Create a complement of the input set. The returned {@link ScoredDocIDs} - * does not contain any scores, which makes sense given that the complementing - * documents were not scored. - * - * Note: the complement set does NOT contain doc ids which are noted as deleted by the given reader - * - * @param docids to be complemented. - * @param reader holding the number of documents & information about deletions. - */ - public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader) - throws IOException { - final int maxDoc = reader.maxDoc(); - - DocIdSet docIdSet = docids.getDocIDs(); - final FixedBitSet complement; - if (docIdSet instanceof FixedBitSet) { - // That is the most common case, if ScoredDocIdsCollector was used. - complement = ((FixedBitSet) docIdSet).clone(); - } else { - complement = new FixedBitSet(maxDoc); - DocIdSetIterator iter = docIdSet.iterator(); - int doc; - while ((doc = iter.nextDoc()) < maxDoc) { - complement.set(doc); - } - } - complement.flip(0, maxDoc); - clearDeleted(reader, complement); - - return createScoredDocIds(complement, maxDoc); - } - - /** Clear all deleted documents from a given open-bit-set according to a given reader */ - private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException { - // TODO use BitsFilteredDocIdSet? - - // If there are no deleted docs - if (!reader.hasDeletions()) { - return; // return immediately - } - - DocIdSetIterator it = set.iterator(); - int doc = it.nextDoc(); - for (AtomicReaderContext context : reader.leaves()) { - AtomicReader r = context.reader(); - final int maxDoc = r.maxDoc() + context.docBase; - if (doc >= maxDoc) { // skip this segment - continue; - } - if (!r.hasDeletions()) { // skip all docs that belong to this reader as it has no deletions - while ((doc = it.nextDoc()) < maxDoc) {} - continue; - } - Bits liveDocs = r.getLiveDocs(); - do { - if (!liveDocs.get(doc - context.docBase)) { - set.clear(doc); - } - } while ((doc = it.nextDoc()) < maxDoc); - } - } - - /** - * Create a subset of an existing ScoredDocIDs object. - * - * @param allDocIds orginal set - * @param sampleSet Doc Ids of the subset. - */ - public static final ScoredDocIDs createScoredDocIDsSubset(final ScoredDocIDs allDocIds, - final int[] sampleSet) throws IOException { - - // sort so that we can scan docs in order - final int[] docids = sampleSet; - Arrays.sort(docids); - final float[] scores = new float[docids.length]; - // fetch scores and compute size - ScoredDocIDsIterator it = allDocIds.iterator(); - int n = 0; - while (it.next() && n < docids.length) { - int doc = it.getDocID(); - if (doc == docids[n]) { - scores[n] = it.getScore(); - ++n; - } - } - final int size = n; - - return new ScoredDocIDs() { - - @Override - public DocIdSet getDocIDs() { - return new DocIdSet() { - - @Override - public boolean isCacheable() { return true; } - - @Override - public DocIdSetIterator iterator() { - return new DocIdSetIterator() { - - private int next = -1; - - @Override - public int advance(int target) { - while (next < size && docids[next++] < target) { - } - return next == size ? NO_MORE_DOCS : docids[next]; - } - - @Override - public int docID() { - return docids[next]; - } - - @Override - public int nextDoc() { - if (++next >= size) { - return NO_MORE_DOCS; - } - return docids[next]; - } - - @Override - public long cost() { - return size; - } - }; - } - }; - } - - @Override - public ScoredDocIDsIterator iterator() { - return new ScoredDocIDsIterator() { - - int next = -1; - - @Override - public boolean next() { return ++next < size; } - - @Override - public float getScore() { return scores[next]; } - - @Override - public int getDocID() { return docids[next]; } - }; - } - - @Override - public int size() { return size; } - - }; - } - - /** - * Creates a {@link ScoredDocIDs} which returns document IDs all non-deleted doc ids - * according to the given reader. - * The returned set contains the range of [0 .. reader.maxDoc ) doc ids - */ - public static final ScoredDocIDs createAllDocsScoredDocIDs (final IndexReader reader) { - if (reader.hasDeletions()) { - return new AllLiveDocsScoredDocIDs(reader); - } - return new AllDocsScoredDocIDs(reader); - } - - /** - * Create a ScoredDocIDs out of a given docIdSet and the total number of documents in an index - */ - public static final ScoredDocIDs createScoredDocIds(final DocIdSet docIdSet, final int maxDoc) { - return new ScoredDocIDs() { - private int size = -1; - @Override - public DocIdSet getDocIDs() { return docIdSet; } - - @Override - public ScoredDocIDsIterator iterator() throws IOException { - final DocIdSetIterator docIterator = docIdSet.iterator(); - return new ScoredDocIDsIterator() { - @Override - public boolean next() { - try { - return docIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public float getScore() { return DEFAULT_SCORE; } - - @Override - public int getDocID() { return docIterator.docID(); } - }; - } - - @Override - public int size() { - // lazy size computation - if (size < 0) { - OpenBitSetDISI openBitSetDISI; - try { - openBitSetDISI = new OpenBitSetDISI(docIdSet.iterator(), maxDoc); - } catch (IOException e) { - throw new RuntimeException(e); - } - size = (int) openBitSetDISI.cardinality(); - } - return size; - } - }; - } - - /** - * All docs ScoredDocsIDs - this one is simply an 'all 1' bitset. Used when - * there are no deletions in the index and we wish to go through each and - * every document - */ - private static class AllDocsScoredDocIDs implements ScoredDocIDs { - final int maxDoc; - - public AllDocsScoredDocIDs(IndexReader reader) { - this.maxDoc = reader.maxDoc(); - } - - @Override - public int size() { - return maxDoc; - } - - @Override - public DocIdSet getDocIDs() { - return new DocIdSet() { - - @Override - public boolean isCacheable() { - return true; - } - - @Override - public DocIdSetIterator iterator() { - return new DocIdSetIterator() { - private int next = -1; - - @Override - public int advance(int target) { - if (target <= next) { - target = next + 1; - } - return next = target >= maxDoc ? NO_MORE_DOCS : target; - } - - @Override - public int docID() { - return next; - } - - @Override - public int nextDoc() { - return ++next < maxDoc ? next : NO_MORE_DOCS; - } - - @Override - public long cost() { - return maxDoc; - } - }; - } - }; - } - - @Override - public ScoredDocIDsIterator iterator() { - try { - final DocIdSetIterator iter = getDocIDs().iterator(); - return new ScoredDocIDsIterator() { - @Override - public boolean next() { - try { - return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; - } catch (IOException e) { - // cannot happen - return false; - } - } - - @Override - public float getScore() { - return DEFAULT_SCORE; - } - - @Override - public int getDocID() { - return iter.docID(); - } - }; - } catch (IOException e) { - // cannot happen - throw new RuntimeException(e); - } - } - } - - /** - * An All-docs bitset which has '0' for deleted documents and '1' for the - * rest. Useful for iterating over all 'live' documents in a given index. - *

- * NOTE: this class would work for indexes with no deletions at all, - * although it is recommended to use {@link AllDocsScoredDocIDs} to ease - * the performance cost of validating isDeleted() on each and every docId - */ - private static final class AllLiveDocsScoredDocIDs implements ScoredDocIDs { - final int maxDoc; - final IndexReader reader; - - AllLiveDocsScoredDocIDs(IndexReader reader) { - this.maxDoc = reader.maxDoc(); - this.reader = reader; - } - - @Override - public int size() { - return reader.numDocs(); - } - - @Override - public DocIdSet getDocIDs() { - return new DocIdSet() { - - @Override - public boolean isCacheable() { - return true; - } - - @Override - public DocIdSetIterator iterator() { - return new DocIdSetIterator() { - final Bits liveDocs = MultiFields.getLiveDocs(reader); - private int next = -1; - - @Override - public int advance(int target) { - if (target > next) { - next = target - 1; - } - return nextDoc(); - } - - @Override - public int docID() { - return next; - } - - @Override - public int nextDoc() { - do { - ++next; - } while (next < maxDoc && liveDocs != null && !liveDocs.get(next)); - - return next < maxDoc ? next : NO_MORE_DOCS; - } - - @Override - public long cost() { - return maxDoc; - } - }; - } - }; - } - - @Override - public ScoredDocIDsIterator iterator() { - try { - final DocIdSetIterator iter = getDocIDs().iterator(); - return new ScoredDocIDsIterator() { - @Override - public boolean next() { - try { - return iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; - } catch (IOException e) { - // cannot happen - return false; - } - } - - @Override - public float getScore() { - return DEFAULT_SCORE; - } - - @Override - public int getDocID() { - return iter.docID(); - } - }; - } catch (IOException e) { - // cannot happen - throw new RuntimeException(e); - } - } - } - -} \ No newline at end of file Index: lucene/facet/src/test/org/apache/lucene/facet/complements/TestFacetsAccumulatorWithComplement.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/complements/TestFacetsAccumulatorWithComplement.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/complements/TestFacetsAccumulatorWithComplement.java (working copy) @@ -4,6 +4,7 @@ import java.util.List; import org.apache.lucene.facet.FacetTestBase; +import org.apache.lucene.facet.old.OldFacetsAccumulator; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.CountFacetRequest; @@ -10,7 +11,6 @@ import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetsCollector; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; @@ -112,8 +112,8 @@ /** compute facets with certain facet requests and docs */ private List findFacets(boolean withComplement) throws IOException { FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(new CategoryPath("root","a"), 10)); - StandardFacetsAccumulator sfa = new StandardFacetsAccumulator(fsp, indexReader, taxoReader); - sfa.setComplementThreshold(withComplement ? StandardFacetsAccumulator.FORCE_COMPLEMENT : StandardFacetsAccumulator.DISABLE_COMPLEMENT); + OldFacetsAccumulator sfa = new OldFacetsAccumulator(fsp, indexReader, taxoReader); + sfa.setComplementThreshold(withComplement ? OldFacetsAccumulator.FORCE_COMPLEMENT : OldFacetsAccumulator.DISABLE_COMPLEMENT); FacetsCollector fc = FacetsCollector.create(sfa); searcher.search(new MatchAllDocsQuery(), fc); Index: lucene/facet/src/test/org/apache/lucene/facet/old/AdaptiveAccumulatorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/old/AdaptiveAccumulatorTest.java (revision 0) +++ lucene/facet/src/test/org/apache/lucene/facet/old/AdaptiveAccumulatorTest.java (working copy) @@ -0,0 +1,38 @@ +package org.apache.lucene.facet.old; + +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.sampling.BaseSampleTestTopK; +import org.apache.lucene.facet.sampling.Sampler; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.LuceneTestCase.Slow; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Slow +public class AdaptiveAccumulatorTest extends BaseSampleTestTopK { + + @Override + protected OldFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, + IndexReader indexReader, FacetSearchParams searchParams) { + AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams, indexReader, taxoReader); + res.setSampler(sampler); + return res; + } + +} Property changes on: lucene/facet/src/test/org/apache/lucene/facet/old/AdaptiveAccumulatorTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/test/org/apache/lucene/facet/old/TestScoredDocIDsUtils.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/old/TestScoredDocIDsUtils.java (revision 0) +++ lucene/facet/src/test/org/apache/lucene/facet/old/TestScoredDocIDsUtils.java (working copy) @@ -0,0 +1,154 @@ +package org.apache.lucene.facet.old; + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.facet.FacetTestCase; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.FixedBitSet; +import org.junit.Test; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestScoredDocIDsUtils extends FacetTestCase { + + @Test + public void testComplementIterator() throws Exception { + final int n = atLeast(10000); + final FixedBitSet bits = new FixedBitSet(n); + Random random = random(); + for (int i = 0; i < n; i++) { + int idx = random.nextInt(n); + bits.flip(idx, idx + 1); + } + + FixedBitSet verify = new FixedBitSet(bits); + + ScoredDocIDs scoredDocIDs = ScoredDocIdsUtils.createScoredDocIds(bits, n); + + Directory dir = newDirectory(); + IndexReader reader = createReaderWithNDocs(random, n, dir); + try { + assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs, reader).size()); + } finally { + reader.close(); + dir.close(); + } + } + + @Test + public void testAllDocs() throws Exception { + int maxDoc = 3; + Directory dir = newDirectory(); + IndexReader reader = createReaderWithNDocs(random(), maxDoc, dir); + try { + ScoredDocIDs all = ScoredDocIdsUtils.createAllDocsScoredDocIDs(reader); + assertEquals("invalid size", maxDoc, all.size()); + ScoredDocIDsIterator iter = all.iterator(); + int doc = 0; + while (iter.next()) { + assertEquals("invalid doc ID: " + iter.getDocID(), doc++, iter.getDocID()); + assertEquals("invalid score: " + iter.getScore(), ScoredDocIDsIterator.DEFAULT_SCORE, iter.getScore(), 0.0f); + } + assertEquals("invalid maxDoc: " + doc, maxDoc, doc); + + DocIdSet docIDs = all.getDocIDs(); + assertTrue("should be cacheable", docIDs.isCacheable()); + DocIdSetIterator docIDsIter = docIDs.iterator(); + assertEquals("nextDoc() hasn't been called yet", -1, docIDsIter.docID()); + assertEquals(0, docIDsIter.nextDoc()); + assertEquals(1, docIDsIter.advance(1)); + // if advance is smaller than current doc, advance to cur+1. + assertEquals(2, docIDsIter.advance(0)); + } finally { + reader.close(); + dir.close(); + } + } + + /** + * Creates an index with n documents, this method is meant for testing purposes ONLY + */ + static IndexReader createReaderWithNDocs(Random random, int nDocs, Directory directory) throws IOException { + return createReaderWithNDocs(random, nDocs, new DocumentFactory(nDocs), directory); + } + + private static class DocumentFactory { + protected final static String field = "content"; + protected final static String delTxt = "delete"; + protected final static String alphaTxt = "alpha"; + + private final static Field deletionMark = new StringField(field, delTxt, Field.Store.NO); + private final static Field alphaContent = new StringField(field, alphaTxt, Field.Store.NO); + + public DocumentFactory(int totalNumDocs) { + } + + public boolean markedDeleted(int docNum) { + return false; + } + + public Document getDoc(int docNum) { + Document doc = new Document(); + if (markedDeleted(docNum)) { + doc.add(deletionMark); + // Add a special field for docs that are marked for deletion. Later we + // assert that those docs are not returned by all-scored-doc-IDs. + FieldType ft = new FieldType(); + ft.setStored(true); + doc.add(new Field("del", Integer.toString(docNum), ft)); + } + + if (haveAlpha(docNum)) { + doc.add(alphaContent); + } + return doc; + } + + public boolean haveAlpha(int docNum) { + return false; + } + } + + static IndexReader createReaderWithNDocs(Random random, int nDocs, DocumentFactory docFactory, Directory dir) throws IOException { + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + newIndexWriterConfig(random, TEST_VERSION_CURRENT, + new MockAnalyzer(random, MockTokenizer.KEYWORD, false))); + for (int docNum = 0; docNum < nDocs; docNum++) { + writer.addDocument(docFactory.getDoc(docNum)); + } + // Delete documents marked for deletion + writer.deleteDocuments(new Term(DocumentFactory.field, DocumentFactory.delTxt)); + writer.close(); + + // Open a fresh read-only reader with the deletions in place + return DirectoryReader.open(dir); + } +} Property changes on: lucene/facet/src/test/org/apache/lucene/facet/old/TestScoredDocIDsUtils.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/test/org/apache/lucene/facet/sampling/BaseSampleTestTopK.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/sampling/BaseSampleTestTopK.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/sampling/BaseSampleTestTopK.java (working copy) @@ -3,18 +3,14 @@ import java.util.List; import java.util.Random; +import org.apache.lucene.facet.old.OldFacetsAccumulator; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.sampling.RandomSampler; -import org.apache.lucene.facet.sampling.RepeatableSampler; -import org.apache.lucene.facet.sampling.Sampler; -import org.apache.lucene.facet.sampling.SamplingParams; import org.apache.lucene.facet.search.BaseTestTopK; import org.apache.lucene.facet.search.FacetRequest; +import org.apache.lucene.facet.search.FacetRequest.ResultMode; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetsCollector; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; -import org.apache.lucene.facet.search.FacetRequest.ResultMode; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -60,7 +56,7 @@ return res; } - protected abstract StandardFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, + protected abstract OldFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, IndexReader indexReader, FacetSearchParams searchParams); /** @@ -123,8 +119,8 @@ private FacetsCollector samplingCollector(final boolean complement, final Sampler sampler, FacetSearchParams samplingSearchParams) { - StandardFacetsAccumulator sfa = getSamplingAccumulator(sampler, taxoReader, indexReader, samplingSearchParams); - sfa.setComplementThreshold(complement ? StandardFacetsAccumulator.FORCE_COMPLEMENT : StandardFacetsAccumulator.DISABLE_COMPLEMENT); + OldFacetsAccumulator sfa = getSamplingAccumulator(sampler, taxoReader, indexReader, samplingSearchParams); + sfa.setComplementThreshold(complement ? OldFacetsAccumulator.FORCE_COMPLEMENT : OldFacetsAccumulator.DISABLE_COMPLEMENT); return FacetsCollector.create(sfa); } Index: lucene/facet/src/test/org/apache/lucene/facet/sampling/OversampleWithDepthTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/sampling/OversampleWithDepthTest.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/sampling/OversampleWithDepthTest.java (working copy) @@ -6,19 +6,15 @@ import org.apache.lucene.document.Document; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.old.OldFacetsAccumulator; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.sampling.RandomSampler; -import org.apache.lucene.facet.sampling.Sampler; -import org.apache.lucene.facet.sampling.SamplingAccumulator; -import org.apache.lucene.facet.sampling.SamplingParams; import org.apache.lucene.facet.search.CountFacetRequest; import org.apache.lucene.facet.search.FacetRequest; +import org.apache.lucene.facet.search.FacetRequest.ResultMode; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetsCollector; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; -import org.apache.lucene.facet.search.FacetRequest.ResultMode; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; @@ -116,7 +112,7 @@ final SamplingParams params) throws IOException { // a FacetsCollector with a sampling accumulator Sampler sampler = new RandomSampler(params, random()); - StandardFacetsAccumulator sfa = new SamplingAccumulator(sampler, fsp, r, tr); + OldFacetsAccumulator sfa = new SamplingAccumulator(sampler, fsp, r, tr); FacetsCollector fcWithSampling = FacetsCollector.create(sfa); IndexSearcher s = newSearcher(r); Index: lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplerTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplerTest.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplerTest.java (working copy) @@ -4,12 +4,12 @@ import java.util.List; import org.apache.lucene.facet.FacetTestBase; +import org.apache.lucene.facet.old.OldFacetsAccumulator; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.CountFacetRequest; import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetsCollector; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.search.MatchAllDocsQuery; import org.junit.After; @@ -99,7 +99,7 @@ // Make sure no complements are in action accumulator - .setComplementThreshold(StandardFacetsAccumulator.DISABLE_COMPLEMENT); + .setComplementThreshold(OldFacetsAccumulator.DISABLE_COMPLEMENT); FacetsCollector fc = FacetsCollector.create(accumulator); Index: lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplingAccumulatorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplingAccumulatorTest.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplingAccumulatorTest.java (working copy) @@ -1,9 +1,7 @@ package org.apache.lucene.facet.sampling; +import org.apache.lucene.facet.old.OldFacetsAccumulator; import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.sampling.Sampler; -import org.apache.lucene.facet.sampling.SamplingAccumulator; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.LuceneTestCase.Slow; @@ -29,7 +27,7 @@ public class SamplingAccumulatorTest extends BaseSampleTestTopK { @Override - protected StandardFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, + protected OldFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, IndexReader indexReader, FacetSearchParams searchParams) { return new SamplingAccumulator(sampler, searchParams, indexReader, taxoReader); } Index: lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplingWrapperTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplingWrapperTest.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplingWrapperTest.java (working copy) @@ -1,14 +1,11 @@ package org.apache.lucene.facet.sampling; +import org.apache.lucene.facet.old.OldFacetsAccumulator; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.sampling.Sampler; -import org.apache.lucene.facet.sampling.SamplingWrapper; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -30,9 +27,9 @@ public class SamplingWrapperTest extends BaseSampleTestTopK { @Override - protected StandardFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, + protected OldFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, IndexReader indexReader, FacetSearchParams searchParams) { - return new SamplingWrapper(new StandardFacetsAccumulator(searchParams, indexReader, taxoReader), sampler); + return new SamplingWrapper(new OldFacetsAccumulator(searchParams, indexReader, taxoReader), sampler); } } Index: lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java (working copy) @@ -1,39 +0,0 @@ -package org.apache.lucene.facet.search; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.LuceneTestCase.Slow; - -import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.sampling.BaseSampleTestTopK; -import org.apache.lucene.facet.sampling.Sampler; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -@Slow -public class AdaptiveAccumulatorTest extends BaseSampleTestTopK { - - @Override - protected StandardFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, - IndexReader indexReader, FacetSearchParams searchParams) { - AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams, indexReader, taxoReader); - res.setSampler(sampler); - return res; - } - -} Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java (working copy) @@ -11,6 +11,8 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.old.AdaptiveFacetsAccumulator; +import org.apache.lucene.facet.old.OldFacetsAccumulator; import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; @@ -218,7 +220,7 @@ FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CategoryPath.EMPTY, 10)); - final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new OldFacetsAccumulator(fsp, r, taxo); FacetsCollector fc = FacetsCollector.create(fa); newSearcher(r).search(new MatchAllDocsQuery(), fc); @@ -252,7 +254,7 @@ FacetSearchParams fsp = new FacetSearchParams( new CountFacetRequest(new CategoryPath("a"), 10), new CountFacetRequest(new CategoryPath("b"), 10)); - final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new OldFacetsAccumulator(fsp, r, taxo); final FacetsCollector fc = FacetsCollector.create(fa); newSearcher(r).search(new MatchAllDocsQuery(), fc); @@ -284,7 +286,7 @@ FacetSearchParams fsp = new FacetSearchParams( new CountFacetRequest(new CategoryPath("a"), 10), new CountFacetRequest(new CategoryPath("b"), 10)); - final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new OldFacetsAccumulator(fsp, r, taxo); final FacetsCollector fc = FacetsCollector.create(fa); // this should populate the cached results, but doing search should clear the cache fc.getFacetResults(); @@ -325,7 +327,7 @@ // assert IntFacetResultHandler FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("a"), 10)); - TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new OldFacetsAccumulator(fsp, r, taxo); FacetsCollector fc = FacetsCollector.create(fa); newSearcher(r).search(new MatchAllDocsQuery(), fc); assertTrue("invalid ordinal for child node: 0", 0 != fc.getFacetResults().get(0).getFacetResultNode().subResults.get(0).ordinal); @@ -340,7 +342,7 @@ } }; } else { - fa = new StandardFacetsAccumulator(fsp, r, taxo); + fa = new OldFacetsAccumulator(fsp, r, taxo); } fc = FacetsCollector.create(fa); newSearcher(r).search(new MatchAllDocsQuery(), fc); @@ -374,7 +376,7 @@ CountFacetRequest cfr = new CountFacetRequest(new CategoryPath("a"), 2); cfr.setResultMode(random().nextBoolean() ? ResultMode.GLOBAL_FLAT : ResultMode.PER_NODE_IN_TREE); FacetSearchParams fsp = new FacetSearchParams(cfr); - final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new OldFacetsAccumulator(fsp, r, taxo); FacetsCollector fc = FacetsCollector.create(fa); newSearcher(r).search(new MatchAllDocsQuery(), fc); @@ -415,10 +417,10 @@ TaxonomyFacetsAccumulator[] accumulators = new TaxonomyFacetsAccumulator[] { new TaxonomyFacetsAccumulator(fsp, indexReader, taxoReader), - new StandardFacetsAccumulator(fsp, indexReader, taxoReader), + new OldFacetsAccumulator(fsp, indexReader, taxoReader), new SamplingAccumulator(sampler, fsp, indexReader, taxoReader), new AdaptiveFacetsAccumulator(fsp, indexReader, taxoReader), - new SamplingWrapper(new StandardFacetsAccumulator(fsp, indexReader, taxoReader), sampler) + new SamplingWrapper(new OldFacetsAccumulator(fsp, indexReader, taxoReader), sampler) }; for (TaxonomyFacetsAccumulator fa : accumulators) { Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (working copy) @@ -11,6 +11,7 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.old.OldFacetsAccumulator; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.FacetRequest.ResultMode; @@ -150,8 +151,8 @@ FacetSearchParams facetSearchParams = new FacetSearchParams(iParams, facetRequests); FacetArrays facetArrays = new FacetArrays(PartitionsUtils.partitionSize(facetSearchParams.indexingParams, tr)); - StandardFacetsAccumulator sfa = new StandardFacetsAccumulator(facetSearchParams, is.getIndexReader(), tr, facetArrays); - sfa.setComplementThreshold(StandardFacetsAccumulator.DISABLE_COMPLEMENT); + OldFacetsAccumulator sfa = new OldFacetsAccumulator(facetSearchParams, is.getIndexReader(), tr, facetArrays); + sfa.setComplementThreshold(OldFacetsAccumulator.DISABLE_COMPLEMENT); FacetsCollector fc = FacetsCollector.create(sfa); is.search(q, fc); Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java (working copy) @@ -4,6 +4,7 @@ import java.util.HashMap; import java.util.List; +import org.apache.lucene.facet.old.OldFacetsAccumulator; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.search.MatchAllDocsQuery; @@ -33,8 +34,8 @@ throws IOException { Query q = new MatchAllDocsQuery(); FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, fip); - StandardFacetsAccumulator sfa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxoReader); - sfa.setComplementThreshold(doComplement ? StandardFacetsAccumulator.FORCE_COMPLEMENT : StandardFacetsAccumulator.DISABLE_COMPLEMENT); + OldFacetsAccumulator sfa = new OldFacetsAccumulator(facetSearchParams, indexReader, taxoReader); + sfa.setComplementThreshold(doComplement ? OldFacetsAccumulator.FORCE_COMPLEMENT : OldFacetsAccumulator.DISABLE_COMPLEMENT); FacetsCollector fc = FacetsCollector.create(sfa); searcher.search(q, fc); List facetResults = fc.getFacetResults(); Index: lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java (revision 1508077) +++ lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java (working copy) @@ -1,156 +0,0 @@ -package org.apache.lucene.facet.util; - -import java.io.IOException; -import java.util.Random; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.StringField; -import org.apache.lucene.facet.FacetTestCase; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.ScoredDocIDsIterator; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.FixedBitSet; -import org.junit.Test; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -public class TestScoredDocIDsUtils extends FacetTestCase { - - @Test - public void testComplementIterator() throws Exception { - final int n = atLeast(10000); - final FixedBitSet bits = new FixedBitSet(n); - Random random = random(); - for (int i = 0; i < n; i++) { - int idx = random.nextInt(n); - bits.flip(idx, idx + 1); - } - - FixedBitSet verify = new FixedBitSet(bits); - - ScoredDocIDs scoredDocIDs = ScoredDocIdsUtils.createScoredDocIds(bits, n); - - Directory dir = newDirectory(); - IndexReader reader = createReaderWithNDocs(random, n, dir); - try { - assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs, reader).size()); - } finally { - reader.close(); - dir.close(); - } - } - - @Test - public void testAllDocs() throws Exception { - int maxDoc = 3; - Directory dir = newDirectory(); - IndexReader reader = createReaderWithNDocs(random(), maxDoc, dir); - try { - ScoredDocIDs all = ScoredDocIdsUtils.createAllDocsScoredDocIDs(reader); - assertEquals("invalid size", maxDoc, all.size()); - ScoredDocIDsIterator iter = all.iterator(); - int doc = 0; - while (iter.next()) { - assertEquals("invalid doc ID: " + iter.getDocID(), doc++, iter.getDocID()); - assertEquals("invalid score: " + iter.getScore(), ScoredDocIDsIterator.DEFAULT_SCORE, iter.getScore(), 0.0f); - } - assertEquals("invalid maxDoc: " + doc, maxDoc, doc); - - DocIdSet docIDs = all.getDocIDs(); - assertTrue("should be cacheable", docIDs.isCacheable()); - DocIdSetIterator docIDsIter = docIDs.iterator(); - assertEquals("nextDoc() hasn't been called yet", -1, docIDsIter.docID()); - assertEquals(0, docIDsIter.nextDoc()); - assertEquals(1, docIDsIter.advance(1)); - // if advance is smaller than current doc, advance to cur+1. - assertEquals(2, docIDsIter.advance(0)); - } finally { - reader.close(); - dir.close(); - } - } - - /** - * Creates an index with n documents, this method is meant for testing purposes ONLY - */ - static IndexReader createReaderWithNDocs(Random random, int nDocs, Directory directory) throws IOException { - return createReaderWithNDocs(random, nDocs, new DocumentFactory(nDocs), directory); - } - - private static class DocumentFactory { - protected final static String field = "content"; - protected final static String delTxt = "delete"; - protected final static String alphaTxt = "alpha"; - - private final static Field deletionMark = new StringField(field, delTxt, Field.Store.NO); - private final static Field alphaContent = new StringField(field, alphaTxt, Field.Store.NO); - - public DocumentFactory(int totalNumDocs) { - } - - public boolean markedDeleted(int docNum) { - return false; - } - - public Document getDoc(int docNum) { - Document doc = new Document(); - if (markedDeleted(docNum)) { - doc.add(deletionMark); - // Add a special field for docs that are marked for deletion. Later we - // assert that those docs are not returned by all-scored-doc-IDs. - FieldType ft = new FieldType(); - ft.setStored(true); - doc.add(new Field("del", Integer.toString(docNum), ft)); - } - - if (haveAlpha(docNum)) { - doc.add(alphaContent); - } - return doc; - } - - public boolean haveAlpha(int docNum) { - return false; - } - } - - static IndexReader createReaderWithNDocs(Random random, int nDocs, DocumentFactory docFactory, Directory dir) throws IOException { - RandomIndexWriter writer = new RandomIndexWriter(random, dir, - newIndexWriterConfig(random, TEST_VERSION_CURRENT, - new MockAnalyzer(random, MockTokenizer.KEYWORD, false))); - for (int docNum = 0; docNum < nDocs; docNum++) { - writer.addDocument(docFactory.getDoc(docNum)); - } - // Delete documents marked for deletion - writer.deleteDocuments(new Term(DocumentFactory.field, DocumentFactory.delTxt)); - writer.close(); - - // Open a fresh read-only reader with the deletions in place - return DirectoryReader.open(dir); - } -}