Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1433611) +++ lucene/CHANGES.txt (working copy) @@ -32,8 +32,12 @@ ======================= Lucene 4.2.0 ======================= -(No changes yet) +Changes in backwards compatibility policy +* LUCENE-4602: FacetFields now stores facet ordinals in a DocValues field, + rather than a payload. This forces rebuilding existing indexes (a migration + tool will be provided too). Since DocValues support in-memory cachine, + CategoryListCache was removed too. (Shai Erera, Michael McCandless) ======================= Lucene 4.1.0 ======================= Index: lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java =================================================================== --- lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java (revision 1433611) +++ lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java (working copy) @@ -90,8 +90,9 @@ DirectoryReader reader = DirectoryReader.open(srcIndexDir, -1); List leaves = reader.leaves(); - AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()]; - for (int i = 0; i < leaves.size(); i++) { + int numReaders = leaves.size(); + AtomicReader wrappedLeaves[] = new AtomicReader[numReaders]; + for (int i = 0; i < numReaders; i++) { wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap, params); } try { Index: lucene/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java =================================================================== --- lucene/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java (revision 1433611) +++ lucene/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java (working copy) @@ -20,7 +20,6 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -80,12 +79,12 @@ // Initialize PerDimensionIndexingParams static { Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("0"), new CategoryListParams(new Term("$Digits", "Zero"))); - paramsMap.put(new CategoryPath("1"), new CategoryListParams(new Term("$Digits", "One"))); - paramsMap.put(new CategoryPath("2"), new CategoryListParams(new Term("$Digits", "Two"))); - paramsMap.put(new CategoryPath("3"), new CategoryListParams(new Term("$Digits", "Three"))); - paramsMap.put(new CategoryPath("4"), new CategoryListParams(new Term("$Digits", "Four"))); - paramsMap.put(new CategoryPath("5"), new CategoryListParams(new Term("$Digits", "Five"))); + paramsMap.put(new CategoryPath("0"), new CategoryListParams("$Digits$Zero")); + paramsMap.put(new CategoryPath("1"), new CategoryListParams("$Digits$One")); + paramsMap.put(new CategoryPath("2"), new CategoryListParams("$Digits$Two")); + paramsMap.put(new CategoryPath("3"), new CategoryListParams("$Digits$Three")); + paramsMap.put(new CategoryPath("4"), new CategoryListParams("$Digits$Four")); + paramsMap.put(new CategoryPath("5"), new CategoryListParams("$Digits$Five")); MULTI_IPARAMS = new PerDimensionIndexingParams(paramsMap); } Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java (working copy) @@ -114,7 +114,7 @@ } @Override - protected FieldType fieldType() { + protected FieldType drillDownFieldType() { return DRILL_DOWN_TYPE; } Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java (working copy) @@ -0,0 +1,98 @@ +package org.apache.lucene.facet.associations; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.util.BytesRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An iterator over a document's category associations. + * + * @lucene.experimental + */ +public abstract class AssociationsIterator { + + private final T association; + private final String dvField; + private final boolean useDirectSource; + private final BytesRef bytes = new BytesRef(32); + + private DocValues.Source current; + + /** + * Construct a new associations iterator. The given + * {@link CategoryAssociation} is used to deserialize the association values. + * It is assumed that all association values can be deserialized with the + * given {@link CategoryAssociation}. + * + *

+ * NOTE: if {@code useDirectSource} is {@code false}, then a + * {@link DocValues#getSource()} is used, which is an in-memory {@link Source}. + */ + public AssociationsIterator(String field, T association, boolean useDirectSource) throws IOException { + this.association = association; + this.dvField = field + association.getCategoryListID(); + this.useDirectSource = useDirectSource; + } + + /** + * Sets the {@link AtomicReaderContext} for which {@link #setNextDoc(int)} + * calls will be made. Returns true iff this reader has associations for any + * of the documents belonging to the association given to the constructor. + */ + public final boolean setNextReader(AtomicReaderContext context) throws IOException { + DocValues dv = context.reader().docValues(dvField); + if (dv == null) { + current = null; + return false; + } + + current = useDirectSource ? dv.getDirectSource() : dv.getSource(); + return true; + } + + /** + * Skip to the requested document. Returns true iff the document has category + * association values and they were read successfully. Associations are + * handled through {@link #handleAssociation(int, CategoryAssociation)} by + * extending classes. + */ + protected final boolean setNextDoc(int docID) throws IOException { + current.getBytes(docID, bytes); + if (bytes.length == 0) { + return false; // no associations for the requested document + } + + ByteArrayDataInput in = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length); + while (!in.eof()) { + int ordinal = in.readInt(); + association.deserialize(in); + handleAssociation(ordinal, association); + } + return true; + } + + /** A hook for extending classes to handle the given association value for the ordinal. */ + protected abstract void handleAssociation(int ordinal, T association); + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java (working copy) @@ -1,92 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.facet.search.PayloadIterator; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.util.BytesRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An iterator over a document's category associations. - * - * @lucene.experimental - */ -public abstract class AssociationsPayloadIterator { - - private final PayloadIterator pi; - private final T association; - - /** - * Marking whether there are associations (at all) in the given index - */ - private boolean hasAssociations = false; - - /** - * Construct a new associations iterator. The given - * {@link CategoryAssociation} is used to deserialize the association values. - * It is assumed that all association values can be deserialized with the - * given {@link CategoryAssociation}. - */ - public AssociationsPayloadIterator(String field, T association) throws IOException { - pi = new PayloadIterator(new Term(field, association.getCategoryListID())); - this.association = association; - } - - /** - * Sets the {@link AtomicReaderContext} for which {@link #setNextDoc(int)} - * calls will be made. Returns true iff this reader has associations for any - * of the documents belonging to the association given to the constructor. - */ - public final boolean setNextReader(AtomicReaderContext context) throws IOException { - hasAssociations = pi.setNextReader(context); - return hasAssociations; - } - - /** - * Skip to the requested document. Returns true iff the document has category - * association values and they were read successfully. Associations are - * handled through {@link #handleAssociation(int, CategoryAssociation)} by - * extending classes. - */ - protected final boolean setNextDoc(int docID) throws IOException { - if (!hasAssociations) { // there are no associations at all - return false; - } - - BytesRef bytes = pi.getPayload(docID); - if (bytes == null) { // no associations for the requested document - return false; - } - - ByteArrayDataInput in = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length); - while (!in.eof()) { - int ordinal = in.readInt(); - association.deserialize(in); - handleAssociation(ordinal, association); - } - return true; - } - - /** A hook for extending classes to handle the given association value for the ordinal. */ - protected abstract void handleAssociation(int ordinal, T association); - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java (working copy) @@ -0,0 +1,69 @@ +package org.apache.lucene.facet.associations; + +import java.io.IOException; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.util.collections.IntToFloatMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An {@link AssociationsIterator} over integer association values. + * + * @lucene.experimental + */ +public class FloatAssociationsIterator extends AssociationsIterator { + + private final IntToFloatMap ordinalAssociations = new IntToFloatMap(); + + /** + * Constructs a new {@link FloatAssociationsIterator} which uses an + * in-memory {@link DocValues#getSource() DocValues source}. + */ + public FloatAssociationsIterator(String field, CategoryFloatAssociation association) throws IOException { + this(field, association, false); + } + + /** + * Constructs a new {@link FloatAssociationsIterator} which uses a + * {@link DocValues} {@link Source} per {@code useDirectSource}. + */ + public FloatAssociationsIterator(String field, CategoryFloatAssociation association, boolean useDirectSource) + throws IOException { + super(field, association, useDirectSource); + } + + @Override + protected void handleAssociation(int ordinal, CategoryFloatAssociation association) { + ordinalAssociations.put(ordinal, association.getValue()); + } + + /** + * Returns the float association values of the categories that are associated + * with the given document, or {@code null} if the document has no + * associations. + *

+ * NOTE: you are not expected to modify the returned map. + */ + public IntToFloatMap getAssociations(int docID) throws IOException { + ordinalAssociations.clear(); + return setNextDoc(docID) ? ordinalAssociations : null; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java (working copy) @@ -1,54 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.util.collections.IntToFloatMap; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An {@link AssociationsPayloadIterator} over integer association values. - * - * @lucene.experimental - */ -public class FloatAssociationsPayloadIterator extends AssociationsPayloadIterator { - - private final IntToFloatMap ordinalAssociations = new IntToFloatMap(); - - public FloatAssociationsPayloadIterator(String field, CategoryFloatAssociation association) throws IOException { - super(field, association); - } - - @Override - protected void handleAssociation(int ordinal, CategoryFloatAssociation association) { - ordinalAssociations.put(ordinal, association.getValue()); - } - - /** - * Returns the float association values of the categories that are associated - * with the given document, or {@code null} if the document has no - * associations. - *

- * NOTE: you are not expected to modify the returned map. - */ - public IntToFloatMap getAssociations(int docID) throws IOException { - ordinalAssociations.clear(); - return setNextDoc(docID) ? ordinalAssociations : null; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java (working copy) @@ -0,0 +1,69 @@ +package org.apache.lucene.facet.associations; + +import java.io.IOException; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.util.collections.IntToIntMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An {@link AssociationsIterator} over integer association values. + * + * @lucene.experimental + */ +public class IntAssociationsIterator extends AssociationsIterator { + + private final IntToIntMap ordinalAssociations = new IntToIntMap(); + + /** + * Constructs a new {@link IntAssociationsIterator} which uses an + * in-memory {@link DocValues#getSource() DocValues source}. + */ + public IntAssociationsIterator(String field, CategoryIntAssociation association) throws IOException { + this(field, association, false); + } + + /** + * Constructs a new {@link IntAssociationsIterator} which uses a + * {@link DocValues} {@link Source} per {@code useDirectSource}. + */ + public IntAssociationsIterator(String field, CategoryIntAssociation association, boolean useDirectSource) + throws IOException { + super(field, association, useDirectSource); + } + + @Override + protected void handleAssociation(int ordinal, CategoryIntAssociation association) { + ordinalAssociations.put(ordinal, association.getValue()); + } + + /** + * Returns the integer association values of the categories that are + * associated with the given document, or {@code null} if the document has no + * associations. + *

+ * NOTE: you are not expected to modify the returned map. + */ + public IntToIntMap getAssociations(int docID) throws IOException { + ordinalAssociations.clear(); + return setNextDoc(docID) ? ordinalAssociations : null; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java (working copy) @@ -1,54 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.util.collections.IntToIntMap; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An {@link AssociationsPayloadIterator} over integer association values. - * - * @lucene.experimental - */ -public class IntAssociationsPayloadIterator extends AssociationsPayloadIterator { - - private final IntToIntMap ordinalAssociations = new IntToIntMap(); - - public IntAssociationsPayloadIterator(String field, CategoryIntAssociation association) throws IOException { - super(field, association); - } - - @Override - protected void handleAssociation(int ordinal, CategoryIntAssociation association) { - ordinalAssociations.put(ordinal, association.getValue()); - } - - /** - * Returns the integer association values of the categories that are - * associated with the given document, or {@code null} if the document has no - * associations. - *

- * NOTE: you are not expected to modify the returned map. - */ - public IntToIntMap getAssociations(int docID) throws IOException { - ordinalAssociations.clear(); - return setNextDoc(docID) ? ordinalAssociations : null; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java (working copy) @@ -56,10 +56,9 @@ private static final class NoPartitionsOrdinalsEncoder extends OrdinalsEncoder { private final IntEncoder encoder; - private final String name; + private final String name = ""; NoPartitionsOrdinalsEncoder(CategoryListParams categoryListParams) { - name = categoryListParams.getTerm().text(); encoder = categoryListParams.createEncoder(); } Index: lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (working copy) @@ -4,17 +4,14 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; @@ -51,32 +48,6 @@ */ public class FacetFields { - // a TokenStream for writing the counting list payload - private static final class CountingListStream extends TokenStream { - private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private Iterator> categoriesData; - - CountingListStream() {} - - @Override - public boolean incrementToken() throws IOException { - if (!categoriesData.hasNext()) { - return false; - } - - Entry entry = categoriesData.next(); - termAtt.setEmpty().append(entry.getKey()); - payloadAtt.setPayload(entry.getValue()); - return true; - } - - void setCategoriesData(Map categoriesData) { - this.categoriesData = categoriesData.entrySet().iterator(); - } - - } - // The counting list is written in a payload, but we don't store it // nor need norms. private static final FieldType COUNTING_LIST_PAYLOAD_TYPE = new FieldType(); @@ -94,9 +65,7 @@ // Therefore we set its IndexOptions to DOCS_ONLY. private static final FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED); static { - // TODO: once we cutover to DocValues, we can set it to DOCS_ONLY for this - // FacetFields (not associations) - DRILL_DOWN_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + DRILL_DOWN_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); DRILL_DOWN_TYPE.freeze(); } @@ -175,10 +144,20 @@ * Returns the {@link FieldType} with which the drill-down terms should be * indexed. The default is {@link IndexOptions#DOCS_ONLY}. */ - protected FieldType fieldType() { + protected FieldType drillDownFieldType() { return DRILL_DOWN_TYPE; } + /** + * Add the counting list data to the document under the given field. Note that + * the field is determined by the {@link CategoryListParams}. + */ + protected void addCountingListData(Document doc, Map categoriesData, String field) { + for (Entry entry : categoriesData.entrySet()) { + doc.add(new StraightBytesDocValuesField(field + entry.getKey(), entry.getValue())); + } + } + /** Adds the needed facet fields to the document. */ public void addFields(Document doc, Iterable categories) throws IOException { if (categories == null) { @@ -198,7 +177,7 @@ IntsRef ordinals = new IntsRef(32); // should be enough for most common applications for (Entry> e : categoryLists.entrySet()) { final CategoryListParams clp = e.getKey(); - final String field = clp.getTerm().field(); + final String field = clp.field; // build category list data ordinals.length = 0; // reset @@ -214,13 +193,11 @@ Map categoriesData = getCategoryListData(clp, ordinals, e.getValue()); // add the counting list data - CountingListStream ts = new CountingListStream(); - ts.setCategoriesData(categoriesData); - doc.add(new Field(field, ts, COUNTING_LIST_PAYLOAD_TYPE)); + addCountingListData(doc, categoriesData, field); // add the drill-down field DrillDownStream drillDownStream = getDrillDownStream(e.getValue()); - Field drillDown = new Field(field, drillDownStream, fieldType()); + Field drillDown = new Field(field, drillDownStream, drillDownFieldType()); doc.add(drillDown); } } Index: lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java (working copy) @@ -25,13 +25,10 @@ import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap; import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.Fields; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.FilterAtomicReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.encoding.IntDecoder; @@ -41,8 +38,8 @@ * A {@link FilterAtomicReader} for updating facets ordinal references, * based on an ordinal map. You should use this code in conjunction with merging * taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap} - * which maps the 'old' payloads to the 'new' ones. You can use that map to - * re-map the payloads which contain the facets information (ordinals) either + * which maps the 'old' ordinals to the 'new' ones. You can use that map to + * re-map the doc values which contain the facets information (ordinals) either * before or while merging the indexes. *

* For re-mapping the ordinals during index merge, do the following: @@ -70,10 +67,9 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader { private final int[] ordinalMap; - // a little obtuse: but we dont need to create Term objects this way - private final Map> termMap = - new HashMap>(1); + private final Map dvFieldMap = new HashMap(); + /** * Wraps an AtomicReader, mapping ordinals according to the ordinalMap. * Calls {@link #OrdinalMappingAtomicReader(AtomicReader, int[], FacetIndexingParams) @@ -91,125 +87,85 @@ super(in); this.ordinalMap = ordinalMap; for (CategoryListParams params: indexingParams.getAllCategoryListParams()) { - Term term = params.getTerm(); - Map fieldMap = termMap.get(term.field()); - if (fieldMap == null) { - fieldMap = new HashMap(1); - termMap.put(term.field(), fieldMap); - } - fieldMap.put(term.bytes(), params); + dvFieldMap.put(params.field, params); } } @Override - public Fields getTermVectors(int docID) throws IOException { - Fields fields = super.getTermVectors(docID); - if (fields == null) { - return null; - } else { - return new OrdinalMappingFields(fields); + public DocValues docValues(String field) throws IOException { + DocValues inner = super.docValues(field); + if (inner == null) { + return inner; } - } - - @Override - public Fields fields() throws IOException { - Fields fields = super.fields(); - if (fields == null) { - return null; + + CategoryListParams clp = dvFieldMap.get(field); + if (clp == null) { + return inner; } else { - return new OrdinalMappingFields(fields); + return new OrdinalMappingDocValues(inner, clp); } } - private class OrdinalMappingFields extends FilterFields { + private class OrdinalMappingDocValues extends DocValues { - public OrdinalMappingFields(Fields in) { - super(in); + private final CategoryListParams clp; + private final DocValues delegate; + + public OrdinalMappingDocValues(DocValues delegate, CategoryListParams clp) { + this.delegate = delegate; + this.clp = clp; } @Override - public Terms terms(String field) throws IOException { - Terms terms = super.terms(field); - if (terms == null) { - return terms; - } - Map termsMap = termMap.get(field); - if (termsMap == null) { - return terms; - } else { - return new OrdinalMappingTerms(terms, termsMap); - } + protected Source loadSource() throws IOException { + return new OrdinalMappingSource(getType(), clp, delegate.getSource()); } - } - - private class OrdinalMappingTerms extends FilterTerms { - private final Map termsMap; - - public OrdinalMappingTerms(Terms in, Map termsMap) { - super(in); - this.termsMap = termsMap; - } @Override - public TermsEnum iterator(TermsEnum reuse) throws IOException { - // TODO: should we reuse the inner termsenum? - return new OrdinalMappingTermsEnum(super.iterator(reuse), termsMap); + protected Source loadDirectSource() throws IOException { + return new OrdinalMappingSource(getType(), clp, delegate.getDirectSource()); } - } - - private class OrdinalMappingTermsEnum extends FilterTermsEnum { - private final Map termsMap; - - public OrdinalMappingTermsEnum(TermsEnum in, Map termsMap) { - super(in); - this.termsMap = termsMap; - } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { - // TODO: we could reuse our D&P enum if we need - DocsAndPositionsEnum inner = super.docsAndPositions(liveDocs, reuse, flags); - if (inner == null) { - return inner; - } - - CategoryListParams params = termsMap.get(term()); - if (params == null) { - return inner; - } - - return new OrdinalMappingDocsAndPositionsEnum(inner, params); + public Type getType() { + return Type.BYTES_VAR_STRAIGHT; } + } - private class OrdinalMappingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum { + private class OrdinalMappingSource extends Source { + private final IntEncoder encoder; private final IntDecoder decoder; private final IntsRef ordinals = new IntsRef(32); - private final BytesRef payloadOut = new BytesRef(); - - public OrdinalMappingDocsAndPositionsEnum(DocsAndPositionsEnum in, CategoryListParams params) { - super(in); - encoder = params.createEncoder(); + private final Source delegate; + + protected OrdinalMappingSource(Type type, CategoryListParams clp, Source delegate) { + super(type); + this.delegate = delegate; + encoder = clp.createEncoder(); decoder = encoder.createMatchingDecoder(); } - + + @SuppressWarnings("synthetic-access") @Override - public BytesRef getPayload() throws IOException { - BytesRef payload = super.getPayload(); - if (payload == null) { - return payload; + public BytesRef getBytes(int docID, BytesRef ref) { + ref = delegate.getBytes(docID, ref); + if (ref == null || ref.length == 0) { + return ref; } else { - decoder.decode(payload, ordinals); + decoder.decode(ref, ordinals); // map the ordinals for (int i = 0; i < ordinals.length; i++) { ordinals.ints[i] = ordinalMap[ordinals.ints[i]]; } - encoder.encode(ordinals, payloadOut); - return payloadOut; + encoder.encode(ordinals, ref); + return ref; } } + } + } Index: lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java (working copy) @@ -4,9 +4,8 @@ import java.io.Serializable; import org.apache.lucene.facet.search.CategoryListIterator; -import org.apache.lucene.facet.search.PayloadCategoryListIteraor; +import org.apache.lucene.facet.search.DocValuesCategoryListIterator; import org.apache.lucene.facet.util.PartitionsUtils; -import org.apache.lucene.index.Term; import org.apache.lucene.util.encoding.DGapIntEncoder; import org.apache.lucene.util.encoding.IntDecoder; import org.apache.lucene.util.encoding.IntEncoder; @@ -38,39 +37,26 @@ */ public class CategoryListParams implements Serializable { - /** The default term used to store the facets information. */ - public static final Term DEFAULT_TERM = new Term("$facets", "$fulltree$"); + /** The default field used to store the facets information. */ + public static final String DEFAULT_FIELD = "$facets"; - private final Term term; + public final String field; private final int hashCode; - /** - * Constructs a default category list parameters object, using - * {@link #DEFAULT_TERM}. - */ + /** Constructs a default category list parameters object, using {@link #DEFAULT_FIELD}. */ public CategoryListParams() { - this(DEFAULT_TERM); + this(DEFAULT_FIELD); } - /** - * Constructs a category list parameters object, using the given {@link Term}. - * @param term who's payload hold the category-list. - */ - public CategoryListParams(Term term) { - this.term = term; + /** Constructs a category list parameters object, using the given field. */ + public CategoryListParams(String field) { + this.field = field; // Pre-compute the hashCode because these objects are immutable. Saves // some time on the comparisons later. - this.hashCode = term.hashCode(); + this.hashCode = field.hashCode(); } - /** - * A {@link Term} who's payload holds the category-list. - */ - public final Term getTerm() { - return term; - } - /** * Allows to override how categories are encoded and decoded. A matching * {@link IntDecoder} is provided by the {@link IntEncoder}. @@ -110,7 +96,7 @@ // The above hashcodes might equal each other in the case of a collision, // so at this point only directly term equality testing will settle // the equality test. - return this.term.equals(other.term); + return field.equals(other.field); } @Override @@ -121,8 +107,8 @@ /** Create the {@link CategoryListIterator} for the specified partition. */ public CategoryListIterator createCategoryListIterator(int partition) throws IOException { String categoryListTermStr = PartitionsUtils.partitionName(this, partition); - Term payloadTerm = new Term(term.field(), categoryListTermStr); - return new PayloadCategoryListIteraor(payloadTerm, createEncoder().createMatchingDecoder()); + String docValuesField = field + categoryListTermStr; + return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder()); } } \ No newline at end of file Index: lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java (working copy) @@ -0,0 +1,105 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.encoding.IntDecoder; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** A {@link CategoryListIterator} which reads the ordinals from a {@link DocValues}. */ +public class DocValuesCategoryListIterator implements CategoryListIterator { + + private final IntDecoder decoder; + private final String field; + private final int hashCode; + private final boolean useDirectSource; + private final BytesRef bytes = new BytesRef(32); + + private DocValues.Source current; + + /** + * Constructs a new {@link DocValuesCategoryListIterator} which uses an + * in-memory {@link Source}. + */ + public DocValuesCategoryListIterator(String field, IntDecoder decoder) { + this(field, decoder, false); + } + + /** + * Constructs a new {@link DocValuesCategoryListIterator} which uses either a + * {@link DocValues#getDirectSource() direct source} or + * {@link DocValues#getSource() in-memory} one. + */ + public DocValuesCategoryListIterator(String field, IntDecoder decoder, boolean useDirectSource) { + this.field = field; + this.decoder = decoder; + this.hashCode = field.hashCode(); + this.useDirectSource = useDirectSource; + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof DocValuesCategoryListIterator)) { + return false; + } + DocValuesCategoryListIterator other = (DocValuesCategoryListIterator) o; + if (hashCode != other.hashCode) { + return false; + } + + // Hash codes are the same, check equals() to avoid cases of hash-collisions. + return field.equals(other.field); + } + + @Override + public boolean setNextReader(AtomicReaderContext context) throws IOException { + DocValues dv = context.reader().docValues(field); + if (dv == null) { + current = null; + return false; + } + + current = useDirectSource ? dv.getDirectSource() : dv.getSource(); + return true; + } + + @Override + public void getOrdinals(int docID, IntsRef ints) throws IOException { + current.getBytes(docID, bytes); + ints.length = 0; + if (bytes.length > 0) { + decoder.decode(bytes, ints); + } + } + + @Override + public String toString() { + return field; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (working copy) @@ -55,7 +55,7 @@ CategoryListParams clp = iParams.getCategoryListParams(path); char[] buffer = new char[path.fullPathLength()]; iParams.drillDownTermText(path, buffer); - return new Term(clp.getTerm().field(), String.valueOf(buffer)); + return new Term(clp.field, String.valueOf(buffer)); } /** Index: lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java (working copy) @@ -1,81 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.encoding.IntDecoder; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A {@link CategoryListIterator} which reads the category ordinals from a - * payload. - * - * @lucene.experimental - */ -public class PayloadCategoryListIteraor implements CategoryListIterator { - - private final IntDecoder decoder; - private final Term term; - private final PayloadIterator pi; - private final int hashCode; - - public PayloadCategoryListIteraor(Term term, IntDecoder decoder) throws IOException { - pi = new PayloadIterator(term); - this.decoder = decoder; - hashCode = term.hashCode(); - this.term = term; - } - - @Override - public boolean equals(Object other) { - if (!(other instanceof PayloadCategoryListIteraor)) { - return false; - } - PayloadCategoryListIteraor that = (PayloadCategoryListIteraor) other; - if (hashCode != that.hashCode) { - return false; - } - - // Hash codes are the same, check equals() to avoid cases of hash-collisions. - return term.equals(that.term); - } - - @Override - public int hashCode() { - return hashCode; - } - - @Override - public boolean setNextReader(AtomicReaderContext context) throws IOException { - return pi.setNextReader(context); - } - - @Override - public void getOrdinals(int docID, IntsRef ints) throws IOException { - ints.length = 0; - BytesRef payload = pi.getPayload(docID); - if (payload != null) { - decoder.decode(payload, ints); - } - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java (working copy) @@ -1,114 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A utility class for iterating through a posting list of a given term and - * retrieving the payload of the first position in every document. For - * efficiency, this class does not check if documents passed to - * {@link #getPayload(int)} are deleted, since it is usually used to iterate on - * payloads of documents that matched a query. If you need to skip over deleted - * documents, you should do so before calling {@link #getPayload(int)}. - * - * @lucene.experimental - */ -public class PayloadIterator { - - private TermsEnum reuseTE; - private DocsAndPositionsEnum dpe; - private boolean hasMore; - private int curDocID; - - private final Term term; - - public PayloadIterator(Term term) throws IOException { - this.term = term; - } - - /** - * Sets the {@link AtomicReaderContext} for which {@link #getPayload(int)} - * calls will be made. Returns true iff this reader has payload for any of the - * documents belonging to the {@link Term} given to the constructor. - */ - public boolean setNextReader(AtomicReaderContext context) throws IOException { - hasMore = false; - Fields fields = context.reader().fields(); - if (fields != null) { - Terms terms = fields.terms(term.field()); - if (terms != null) { - reuseTE = terms.iterator(reuseTE); - if (reuseTE.seekExact(term.bytes(), true)) { - // this class is usually used to iterate on whatever a Query matched - // if it didn't match deleted documents, we won't receive them. if it - // did, we should iterate on them too, therefore we pass liveDocs=null - dpe = reuseTE.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_PAYLOADS); - if (dpe != null && (curDocID = dpe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - hasMore = true; - } - } - } - } - return hasMore; - } - - /** - * Returns the {@link BytesRef payload} of the given document, or {@code null} - * if the document does not exist, there are no more documents in the posting - * list, or the document exists but has not payload. The given document IDs - * are treated as local to the reader given to - * {@link #setNextReader(AtomicReaderContext)}. - */ - public BytesRef getPayload(int docID) throws IOException { - if (!hasMore) { - return null; - } - - if (curDocID > docID) { - // document does not exist - return null; - } - - if (curDocID < docID) { - curDocID = dpe.advance(docID); - if (curDocID != docID) { // requested document does not have a payload - if (curDocID == DocIdSetIterator.NO_MORE_DOCS) { // no more docs in this reader - hasMore = false; - } - return null; - } - } - - // we're on the document - assert dpe.freq() == 1 : "expecting freq=1 (got " + dpe.freq() + ") term=" + term + " doc=" + curDocID; - int pos = dpe.nextPosition(); - assert pos != -1 : "no positions for term=" + term + " doc=" + curDocID; - return dpe.getPayload(); - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (working copy) @@ -10,6 +10,7 @@ import java.util.logging.Level; import java.util.logging.Logger; +import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.aggregator.Aggregator; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; @@ -110,9 +111,8 @@ if (isUsingComplements) { try { - totalFacetCounts = TotalFacetCountsCache.getSingleton() - .getTotalCounts(indexReader, taxonomyReader, - searchParams.getFacetIndexingParams(), searchParams.getCategoryListCache()); + totalFacetCounts = TotalFacetCountsCache.getSingleton().getTotalCounts(indexReader, taxonomyReader, + searchParams.getFacetIndexingParams()); if (totalFacetCounts != null) { docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader); } else { @@ -242,20 +242,29 @@ int maxDoc = -1; while (iterator.next()) { int docID = iterator.getDocID(); - while (docID >= maxDoc) { // find the segment which contains this document - if (!contexts.hasNext()) { - throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?"); - } - current = contexts.next(); - maxDoc = current.docBase + current.reader().maxDoc(); - if (docID < maxDoc) { // segment has docs, check if it has categories - boolean validSegment = categoryListIter.setNextReader(current); - validSegment &= aggregator.setNextReader(current); - if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs - while (docID < maxDoc && iterator.next()) { - docID = iterator.getDocID(); + if (docID >= maxDoc) { + boolean iteratorDone = false; + do { // find the segment which contains this document + if (!contexts.hasNext()) { + throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?"); + } + current = contexts.next(); + maxDoc = current.docBase + current.reader().maxDoc(); + if (docID < maxDoc) { // segment has docs, check if it has categories + boolean validSegment = categoryListIter.setNextReader(current); + validSegment &= aggregator.setNextReader(current); + if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs + while (docID < maxDoc && iterator.next()) { + docID = iterator.getDocID(); + } + if (docID < maxDoc) { + iteratorDone = true; + } } } + } while (docID >= maxDoc); + if (iteratorDone) { // iterator finished, terminate the loop + break; } } docID -= current.docBase; @@ -312,19 +321,17 @@ HashMap categoryLists = new HashMap(); + FacetIndexingParams indexingParams = searchParams.getFacetIndexingParams(); for (FacetRequest facetRequest : searchParams.getFacetRequests()) { - Aggregator categoryAggregator = facetRequest.createAggregator( - isUsingComplements, facetArrays, taxonomyReader); + Aggregator categoryAggregator = facetRequest.createAggregator(isUsingComplements, facetArrays, taxonomyReader); - CategoryListIterator cli = facetRequest.createCategoryListIterator(taxonomyReader, searchParams, partition); + CategoryListIterator cli = indexingParams.getCategoryListParams(facetRequest.categoryPath).createCategoryListIterator(partition); // get the aggregator Aggregator old = categoryLists.put(cli, categoryAggregator); if (old != null && !old.equals(categoryAggregator)) { - // TODO (Facet): create a more meaningful RE class, and throw it. - throw new RuntimeException( - "Overriding existing category list with different aggregator. THAT'S A NO NO!"); + throw new RuntimeException("Overriding existing category list with different aggregator"); } // if the aggregator is the same we're covered } Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (working copy) @@ -55,7 +55,7 @@ public IntermediateFacetResult fetchPartitionResult(FacetArrays facetArrays, int offset) throws IOException { TopKFacetResult res = null; - int ordinal = taxonomyReader.getOrdinal(facetRequest.getCategoryPath()); + int ordinal = taxonomyReader.getOrdinal(facetRequest.categoryPath); if (ordinal != TaxonomyReader.INVALID_ORDINAL) { double value = 0; if (isSelfPartition(ordinal, facetArrays, offset)) { @@ -79,7 +79,7 @@ @Override public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) throws IOException { - int ordinal = taxonomyReader.getOrdinal(facetRequest.getCategoryPath()); + int ordinal = taxonomyReader.getOrdinal(facetRequest.categoryPath); MutableFacetResultNode resNode = new MutableFacetResultNode(ordinal, 0); int totalFacets = 0; Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (working copy) @@ -34,35 +34,37 @@ */ /** - * Generates {@link FacetResult} from the count arrays aggregated for a particular - * {@link FacetRequest}. - * The generated {@link FacetResult} is a subtree of the taxonomy tree. - * Its root node, {@link FacetResult#getFacetResultNode()}, - * is the facet specified by {@link FacetRequest#getCategoryPath()}, - * and the enumerated children, {@link FacetResultNode#getSubResults()}, of each node in that - * {@link FacetResult} are the top K ( = {@link FacetRequest#getNumResults()}) among its children - * in the taxonomy. - * Top in the sense {@link FacetRequest#getSortBy()}, - * which can be by the values aggregated in the count arrays, or by ordinal numbers; - * also specified is the sort order, {@link FacetRequest#getSortOrder()}, - * ascending or descending, of these values or ordinals before their top K are selected. - * The depth (number of levels excluding the root) of the - * {@link FacetResult} tree is specified by {@link FacetRequest#getDepth()}. + * Generates {@link FacetResult} from the count arrays aggregated for a + * particular {@link FacetRequest}. The generated {@link FacetResult} is a + * subtree of the taxonomy tree. Its root node, + * {@link FacetResult#getFacetResultNode()}, is the facet specified by + * {@link FacetRequest#categoryPath}, and the enumerated children, + * {@link FacetResultNode#getSubResults()}, of each node in that + * {@link FacetResult} are the top K ( = {@link FacetRequest#getNumResults()}) + * among its children in the taxonomy. Top in the sense + * {@link FacetRequest#getSortBy()}, which can be by the values aggregated in + * the count arrays, or by ordinal numbers; also specified is the sort order, + * {@link FacetRequest#getSortOrder()}, ascending or descending, of these values + * or ordinals before their top K are selected. The depth (number of levels + * excluding the root) of the {@link FacetResult} tree is specified by + * {@link FacetRequest#getDepth()}. *

- * Because the number of selected children of each node is restricted, - * and not the overall number of nodes in the {@link FacetResult}, facets not selected + * Because the number of selected children of each node is restricted, and not + * the overall number of nodes in the {@link FacetResult}, facets not selected * into {@link FacetResult} might have better values, or ordinals, (typically, * higher counts), than facets that are selected into the {@link FacetResult}. *

- * The generated {@link FacetResult} also provides with - * {@link FacetResult#getNumValidDescendants()}, which returns the total number of facets - * that are descendants of the root node, no deeper than {@link FacetRequest#getDepth()}, and - * which have valid value. The rootnode itself is not counted here. - * Valid value is determined by the {@link FacetResultsHandler}. - * {@link TopKInEachNodeHandler} defines valid as != 0. + * The generated {@link FacetResult} also provides with + * {@link FacetResult#getNumValidDescendants()}, which returns the total number + * of facets that are descendants of the root node, no deeper than + * {@link FacetRequest#getDepth()}, and which have valid value. The rootnode + * itself is not counted here. Valid value is determined by the + * {@link FacetResultsHandler}. {@link TopKInEachNodeHandler} defines valid as + * != 0. *

- * NOTE: this code relies on the assumption that {@link TaxonomyReader#INVALID_ORDINAL} == -1, a smaller - * value than any valid ordinal. + * NOTE: this code relies on the assumption that + * {@link TaxonomyReader#INVALID_ORDINAL} == -1, a smaller value than any valid + * ordinal. * * @lucene.experimental */ @@ -109,7 +111,7 @@ // get the root of the result tree to be returned, and the depth of that result tree // (depth means number of node levels excluding the root). - int rootNode = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath()); + int rootNode = this.taxonomyReader.getOrdinal(facetRequest.categoryPath); if (rootNode == TaxonomyReader.INVALID_ORDINAL) { return null; } @@ -767,7 +769,7 @@ @Override public FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException { IntermediateFacetResultWithHash tmp = (IntermediateFacetResultWithHash) tmpResult; - int ordinal = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath()); + int ordinal = this.taxonomyReader.getOrdinal(this.facetRequest.categoryPath); if ((tmp == null) || (ordinal == TaxonomyReader.INVALID_ORDINAL)) { return null; } Index: lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java (working copy) @@ -17,8 +17,6 @@ import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.aggregator.Aggregator; import org.apache.lucene.facet.search.aggregator.CountingAggregator; -import org.apache.lucene.facet.search.cache.CategoryListCache; -import org.apache.lucene.facet.search.cache.CategoryListData; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; @@ -155,9 +153,8 @@ private static final List DUMMY_REQ = Arrays.asList( new FacetRequest[] { new CountFacetRequest(CategoryPath.EMPTY, 1) }); - static TotalFacetCounts compute(final IndexReader indexReader, - final TaxonomyReader taxonomy, final FacetIndexingParams facetIndexingParams, - final CategoryListCache clCache) throws IOException { + static TotalFacetCounts compute(final IndexReader indexReader, final TaxonomyReader taxonomy, + final FacetIndexingParams facetIndexingParams) throws IOException { int partitionSize = PartitionsUtils.partitionSize(facetIndexingParams, taxonomy); final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize]; FacetSearchParams newSearchParams = new FacetSearchParams(DUMMY_REQ, facetIndexingParams); @@ -170,8 +167,7 @@ Aggregator aggregator = new CountingAggregator(counts[partition]); HashMap map = new HashMap(); for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) { - final CategoryListIterator cli = clIteraor(clCache, clp, partition); - map.put(cli, aggregator); + map.put(clp.createCategoryListIterator(partition), aggregator); } return map; } @@ -181,14 +177,4 @@ return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed); } - static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp, int partition) - throws IOException { - if (clCache != null) { - CategoryListData cld = clCache.get(clp); - if (cld != null) { - return cld.iterator(partition); - } - } - return clp.createCategoryListIterator(partition); - } } \ No newline at end of file Index: lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java (working copy) @@ -7,12 +7,10 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; -import org.apache.lucene.index.IndexReader; - import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.search.cache.CategoryListCache; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.IndexReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -80,16 +78,20 @@ } /** - * Get the total facet counts for a reader/taxonomy pair and facet indexing parameters. - * If not in cache, computed here and added to the cache for later use. - * @param indexReader the documents index - * @param taxonomy the taxonomy index - * @param facetIndexingParams facet indexing parameters - * @param clCache category list cache for faster computation, can be null + * Get the total facet counts for a reader/taxonomy pair and facet indexing + * parameters. If not in cache, computed here and added to the cache for later + * use. + * + * @param indexReader + * the documents index + * @param taxonomy + * the taxonomy index + * @param facetIndexingParams + * facet indexing parameters * @return the total facet counts. */ public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy, - FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException { + FacetIndexingParams facetIndexingParams) throws IOException { // create the key TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams); // it is important that this call is not synchronized, so that available TFC @@ -99,7 +101,7 @@ markRecentlyUsed(key); return tfc; } - return computeAndCache(key, clCache); + return computeAndCache(key); } /** @@ -149,10 +151,10 @@ * matter this method is synchronized, which is not too bad, because there is * lots of work done in the computations. */ - private synchronized TotalFacetCounts computeAndCache(TFCKey key, CategoryListCache clCache) throws IOException { + private synchronized TotalFacetCounts computeAndCache(TFCKey key) throws IOException { TotalFacetCounts tfc = cache.get(key); if (tfc == null) { - tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams, clCache); + tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams); lruKeys.add(key); cache.put(key,tfc); trimCache(); @@ -161,16 +163,22 @@ } /** - * Load {@link TotalFacetCounts} matching input parameters from the provided outputFile - * and add them into the cache for the provided indexReader, taxonomy, and facetIndexingParams. - * If a {@link TotalFacetCounts} for these parameters already exists in the cache, it will be - * replaced by the loaded one. - * @param inputFile file from which to read the data - * @param indexReader the documents index - * @param taxonomy the taxonomy index - * @param facetIndexingParams the facet indexing parameters - * @throws IOException on error - * @see #store(File, IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache) + * Load {@link TotalFacetCounts} matching input parameters from the provided + * outputFile and add them into the cache for the provided indexReader, + * taxonomy, and facetIndexingParams. If a {@link TotalFacetCounts} for these + * parameters already exists in the cache, it will be replaced by the loaded + * one. + * + * @param inputFile + * file from which to read the data + * @param indexReader + * the documents index + * @param taxonomy + * the taxonomy index + * @param facetIndexingParams + * the facet indexing parameters + * @throws IOException + * on error */ public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy, FacetIndexingParams facetIndexingParams) throws IOException { @@ -185,21 +193,27 @@ } /** - * Store the {@link TotalFacetCounts} matching input parameters into the provided outputFile, - * making them available for a later call to {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}. - * If these {@link TotalFacetCounts} are available in the cache, they are used. But if they are - * not in the cache, this call will first compute them (which will also add them to the cache). - * @param outputFile file to store in. - * @param indexReader the documents index - * @param taxonomy the taxonomy index - * @param facetIndexingParams the facet indexing parameters - * @param clCache category list cache for faster computation, can be null - * @throws IOException on error + * Store the {@link TotalFacetCounts} matching input parameters into the + * provided outputFile, making them available for a later call to + * {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}. If + * these {@link TotalFacetCounts} are available in the cache, they are used. + * But if they are not in the cache, this call will first compute them (which + * will also add them to the cache). + * + * @param outputFile + * file to store in. + * @param indexReader + * the documents index + * @param taxonomy + * the taxonomy index + * @param facetIndexingParams + * the facet indexing parameters + * @throws IOException + * on error * @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams) - * @see #getTotalCounts(IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache) */ public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy, - FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException { + FacetIndexingParams facetIndexingParams) throws IOException { File parentFile = outputFile.getParentFile(); if ( ( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) || @@ -207,7 +221,7 @@ ) { throw new IllegalArgumentException("Exepecting a writable file: "+outputFile); } - TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams, clCache); + TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams); TotalFacetCounts.storeToFile(outputFile, tfc); } Index: lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java (working copy) @@ -3,7 +3,7 @@ import java.io.IOException; import org.apache.lucene.facet.associations.CategoryFloatAssociation; -import org.apache.lucene.facet.associations.FloatAssociationsPayloadIterator; +import org.apache.lucene.facet.associations.FloatAssociationsIterator; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.search.aggregator.Aggregator; import org.apache.lucene.index.AtomicReaderContext; @@ -37,15 +37,15 @@ protected final String field; protected final float[] sumArray; - protected final FloatAssociationsPayloadIterator associations; + protected final FloatAssociationsIterator associations; public AssociationFloatSumAggregator(float[] sumArray) throws IOException { - this(CategoryListParams.DEFAULT_TERM.field(), sumArray); + this(CategoryListParams.DEFAULT_FIELD, sumArray); } public AssociationFloatSumAggregator(String field, float[] sumArray) throws IOException { this.field = field; - associations = new FloatAssociationsPayloadIterator(field, new CategoryFloatAssociation()); + associations = new FloatAssociationsIterator(field, new CategoryFloatAssociation()); this.sumArray = sumArray; } Index: lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java (working copy) @@ -3,7 +3,7 @@ import java.io.IOException; import org.apache.lucene.facet.associations.CategoryIntAssociation; -import org.apache.lucene.facet.associations.IntAssociationsPayloadIterator; +import org.apache.lucene.facet.associations.IntAssociationsIterator; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.search.aggregator.Aggregator; import org.apache.lucene.index.AtomicReaderContext; @@ -37,15 +37,15 @@ protected final String field; protected final int[] sumArray; - protected final IntAssociationsPayloadIterator associations; + protected final IntAssociationsIterator associations; public AssociationIntSumAggregator(int[] sumArray) throws IOException { - this(CategoryListParams.DEFAULT_TERM.field(), sumArray); + this(CategoryListParams.DEFAULT_FIELD, sumArray); } public AssociationIntSumAggregator(String field, int[] sumArray) throws IOException { this.field = field; - associations = new IntAssociationsPayloadIterator(field, new CategoryIntAssociation()); + associations = new IntAssociationsIterator(field, new CategoryIntAssociation()); this.sumArray = sumArray; } Index: lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java (working copy) @@ -1,61 +0,0 @@ -package org.apache.lucene.facet.search.cache; - -import java.io.IOException; -import java.util.HashMap; - -import org.apache.lucene.index.IndexReader; - -import org.apache.lucene.facet.index.params.CategoryListParams; -import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Cache for {@link CategoryListData}, per {@link CategoryListParams}. - * - * @lucene.experimental - */ -public class CategoryListCache { - - private HashMap - cldMap = new HashMap(); - - /** - * Fetch the cached {@link CategoryListData} for a given {@link CategoryListParams}. - */ - public CategoryListData get(CategoryListParams clp) { - return cldMap.get(clp); - } - - /** - * Register a pre-computed {@link CategoryListData}. - */ - public void register(CategoryListParams clp, CategoryListData clData) { - cldMap.put(clp,clData); - } - - /** - * Load and register {@link CategoryListData}. - */ - public void loadAndRegister(CategoryListParams clp, - IndexReader reader, TaxonomyReader taxo, FacetIndexingParams iparams) throws IOException { - CategoryListData clData = new CategoryListData(reader, taxo, iparams, clp); - register(clp,clData); - } -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java (working copy) @@ -1,133 +0,0 @@ -package org.apache.lucene.facet.search.cache; - -import java.io.IOException; - -import org.apache.lucene.facet.index.params.CategoryListParams; -import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.search.CategoryListIterator; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.IntsRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Category list data maintained in RAM. - *

- * Speeds up facets accumulation when more RAM is available. - *

- * Note that this will consume more memory: one int (4 bytes) for each category - * of each document. - *

- * Note: at the moment this class is insensitive to updates of the index, and, - * in particular, does not make use of Lucene's ability to refresh a single - * segment. - *

- * See {@link CategoryListCache#register(CategoryListParams, CategoryListData)} - * and - * {@link CategoryListCache#loadAndRegister(CategoryListParams, IndexReader, TaxonomyReader, FacetIndexingParams)}. - * - * @lucene.experimental - */ -public class CategoryListData { - - // TODO (Facet): experiment with different orders - p-d-c vs. current d-p-c. - private transient volatile int[][][] docPartitionCategories; - - /** - * Empty constructor for extensions with modified computation of the data. - */ - protected CategoryListData() { - } - - /** Compute category list data for caching for faster iteration. */ - CategoryListData(IndexReader reader, TaxonomyReader taxo, FacetIndexingParams iparams, CategoryListParams clp) - throws IOException { - - int[][][]dpf = new int[reader.maxDoc()][][]; - int numPartitions = (int)Math.ceil(taxo.getSize()/(double)iparams.getPartitionSize()); - IntsRef ordinals = new IntsRef(32); - for (int part = 0; part < numPartitions; part++) { - for (AtomicReaderContext context : reader.leaves()) { - CategoryListIterator cli = clp.createCategoryListIterator(part); - if (cli.setNextReader(context)) { - final int maxDoc = context.reader().maxDoc(); - for (int i = 0; i < maxDoc; i++) { - cli.getOrdinals(i, ordinals); - if (ordinals.length > 0) { - int doc = i + context.docBase; - if (dpf[doc] == null) { - dpf[doc] = new int[numPartitions][]; - } - if (dpf[doc][part] == null) { - dpf[doc][part] = new int[ordinals.length]; - } - for (int j = 0; j < ordinals.length; j++) { - dpf[doc][part][j] = ordinals.ints[j]; - } - } - } - } - } - } - docPartitionCategories = dpf; - } - - /** - * Iterate on the category list data for the specified partition. - */ - public CategoryListIterator iterator(int partition) throws IOException { - return new RAMCategoryListIterator(partition, docPartitionCategories); - } - - /** Internal: category list iterator over uncompressed category info in RAM */ - private static class RAMCategoryListIterator implements CategoryListIterator { - - private int docBase; - private final int part; - private final int[][][] dpc; - - RAMCategoryListIterator(int part, int[][][] docPartitionCategories) { - this.part = part; - dpc = docPartitionCategories; - } - - @Override - public boolean setNextReader(AtomicReaderContext context) throws IOException { - docBase = context.docBase; - return dpc != null && dpc.length > part; - } - - @Override - public void getOrdinals(int docID, IntsRef ints) throws IOException { - ints.length = 0; - docID += docBase; - if (dpc.length > docID && dpc[docID] != null && dpc[docID][part] != null) { - if (ints.ints.length < dpc[docID][part].length) { - ints.grow(dpc[docID][part].length); - } - ints.length = 0; - for (int i = 0; i < dpc[docID][part].length; i++) { - ints.ints[ints.length++] = dpc[docID][part][i]; - } - } - } - } - -} \ No newline at end of file Index: lucene/facet/src/java/org/apache/lucene/facet/search/cache/package.html =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/cache/package.html (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/cache/package.html (working copy) @@ -1,22 +0,0 @@ - - - - -Caching to speed up facets accumulation. - - Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java (working copy) @@ -2,15 +2,11 @@ import java.io.IOException; -import org.apache.lucene.facet.index.params.CategoryListParams; -import org.apache.lucene.facet.search.CategoryListIterator; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetResultsHandler; import org.apache.lucene.facet.search.TopKFacetResultsHandler; import org.apache.lucene.facet.search.TopKInEachNodeHandler; import org.apache.lucene.facet.search.aggregator.Aggregator; -import org.apache.lucene.facet.search.cache.CategoryListCache; -import org.apache.lucene.facet.search.cache.CategoryListData; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; @@ -64,7 +60,7 @@ */ public static final ResultMode DEFAULT_RESULT_MODE = ResultMode.PER_NODE_IN_TREE; - private final CategoryPath categoryPath; + public final CategoryPath categoryPath; private final int numResults; private int numLabel; private int depth; @@ -134,17 +130,6 @@ } /** - * The root category of this facet request. The categories that are returned - * as a result of this request will all be descendants of this root. - *

- * NOTE: you should not modify the returned {@link CategoryPath}, or - * otherwise some methonds may not work properly, e.g. {@link #hashCode()}. - */ - public final CategoryPath getCategoryPath() { - return categoryPath; - } - - /** * How deeply to look under the given category. If the depth is 0, * only the category itself is counted. If the depth is 1, its immediate * children are also counted, and so on. If the depth is Integer.MAX_VALUE, @@ -160,24 +145,22 @@ * will have their category paths calculated, and the rest will only be * available as ordinals (category numbers) and will have null paths. *

- * If Integer.MAX_VALUE is specified, all - * results are labled. + * If Integer.MAX_VALUE is specified, all results are labled. *

- * The purpose of this parameter is to avoid having to run the whole - * faceted search again when the user asks for more values for the facet; - * The application can ask (getNumResults()) for more values than it needs - * to show, but keep getNumLabel() only the number it wants to immediately - * show. The slow-down caused by finding more values is negligible, because - * the slowest part - finding the categories' paths, is avoided. + * The purpose of this parameter is to avoid having to run the whole faceted + * search again when the user asks for more values for the facet; The + * application can ask (getNumResults()) for more values than it needs to + * show, but keep getNumLabel() only the number it wants to immediately show. + * The slow-down caused by finding more values is negligible, because the + * slowest part - finding the categories' paths, is avoided. *

- * Depending on the {@link #getResultMode() LimitsMode}, - * this limit is applied globally or per results node. - * In the global mode, if this limit is 3, - * only 3 top results would be labeled. - * In the per-node mode, if this limit is 3, - * 3 top children of {@link #getCategoryPath() the target category} would be labeled, - * as well as 3 top children of each of them, and so forth, until the depth defined - * by {@link #getDepth()}. + * Depending on the {@link #getResultMode() LimitsMode}, this limit is applied + * globally or per results node. In the global mode, if this limit is 3, only + * 3 top results would be labeled. In the per-node mode, if this limit is 3, 3 + * top children of {@link #categoryPath the target category} would be labeled, + * as well as 3 top children of each of them, and so forth, until the depth + * defined by {@link #getDepth()}. + * * @see #getResultMode() */ public final int getNumLabel() { @@ -185,20 +168,18 @@ } /** - * The number of sub-categories to return (at most). - * If the sub-categories are returned. + * The number of sub-categories to return (at most). If the sub-categories are + * returned. *

- * If Integer.MAX_VALUE is specified, all - * sub-categories are returned. + * If Integer.MAX_VALUE is specified, all sub-categories are returned. *

- * Depending on the {@link #getResultMode() LimitsMode}, - * this limit is applied globally or per results node. - * In the global mode, if this limit is 3, - * only 3 top results would be computed. - * In the per-node mode, if this limit is 3, - * 3 top children of {@link #getCategoryPath() the target category} would be returned, - * as well as 3 top children of each of them, and so forth, until the depth defined - * by {@link #getDepth()}. + * Depending on the {@link #getResultMode() LimitsMode}, this limit is applied + * globally or per results node. In the global mode, if this limit is 3, only + * 3 top results would be computed. In the per-node mode, if this limit is 3, + * 3 top children of {@link #categoryPath the target category} would be + * returned, as well as 3 top children of each of them, and so forth, until + * the depth defined by {@link #getDepth()}. + * * @see #getResultMode() */ public final int getNumResults() { @@ -320,24 +301,6 @@ throws IOException; /** - * Create the category list iterator for the specified partition. If a non - * null cache is provided which contains the required data, use it for the - * iteration. - */ - public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams, int partition) - throws IOException { - CategoryListCache clCache = sParams.getCategoryListCache(); - CategoryListParams clParams = sParams.getFacetIndexingParams().getCategoryListParams(categoryPath); - if (clCache != null) { - CategoryListData clData = clCache.get(clParams); - if (clData != null) { - return clData.iterator(partition); - } - } - return clParams.createCategoryListIterator(partition); - } - - /** * Return the value of a category used for facets computations for this * request. For a count request this would be the count for that facet, i.e. * an integer number. but for other requests this can be the result of a more Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetSearchParams.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetSearchParams.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetSearchParams.java (working copy) @@ -4,7 +4,6 @@ import java.util.List; import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.search.cache.CategoryListCache; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -72,14 +71,6 @@ } /** - * Returns the {@link CategoryListCache}. By default returns {@code null}, you - * should override if you want to use a cache. - */ - public CategoryListCache getCategoryListCache() { - return null; - } - - /** * Returns the {@link FacetIndexingParams} that were passed to the * constructor. */ Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java (working copy) @@ -28,7 +28,10 @@ /** * A {@link FacetRequest} for weighting facets according to their float - * association by summing the association values. + * association by summing the association values. Note that this class caches + * the associations data in-memory by default. You can override + * {@link #createAggregator(boolean, FacetArrays, TaxonomyReader)} to return an + * {@link AssociationFloatSumAggregator} which does otherwise. * * @lucene.experimental */ Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java (working copy) @@ -4,6 +4,7 @@ import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.aggregator.Aggregator; +import org.apache.lucene.facet.search.aggregator.associations.AssociationFloatSumAggregator; import org.apache.lucene.facet.search.aggregator.associations.AssociationIntSumAggregator; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.taxonomy.CategoryPath; @@ -28,7 +29,10 @@ /** * A {@link FacetRequest} for weighting facets according to their integer - * association by summing the association values. + * association by summing the association values. Note that this class caches + * the associations data in-memory by default. You can override + * {@link #createAggregator(boolean, FacetArrays, TaxonomyReader)} to return an + * {@link AssociationFloatSumAggregator} which does otherwise. * * @lucene.experimental */ Index: lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResult.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResult.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResult.java (working copy) @@ -39,7 +39,7 @@ /** * Facet result node matching the root of the {@link #getFacetRequest() facet request}. * @see #getFacetRequest() - * @see FacetRequest#getCategoryPath() + * @see FacetRequest#categoryPath */ public final FacetResultNode getFacetResultNode() { return this.rootNode; Index: lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java (working copy) @@ -4,9 +4,6 @@ import java.util.ArrayList; import java.util.List; -import org.apache.lucene.index.IndexReader; - -import org.apache.lucene.facet.search.CategoryListIterator; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.ScoredDocIDs; import org.apache.lucene.facet.search.aggregator.Aggregator; @@ -16,6 +13,7 @@ import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.search.results.MutableFacetResultNode; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.IndexReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -205,7 +203,7 @@ private static class OverSampledFacetRequest extends FacetRequest { final FacetRequest orig; public OverSampledFacetRequest(FacetRequest orig, int num) { - super(orig.getCategoryPath(), num); + super(orig.categoryPath, num); this.orig = orig; setDepth(orig.getDepth()); setNumLabel(orig.getNumLabel()); @@ -215,12 +213,6 @@ } @Override - public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams, - int partition) throws IOException { - return orig.createCategoryListIterator(taxo, sParams, partition); - } - - @Override public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) throws IOException { return orig.createAggregator(useComplements, arrays, taxonomy); Index: lucene/facet/src/java/org/apache/lucene/facet/util/PartitionsUtils.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/util/PartitionsUtils.java (revision 1433611) +++ lucene/facet/src/java/org/apache/lucene/facet/util/PartitionsUtils.java (working copy) @@ -28,16 +28,10 @@ */ public final class PartitionsUtils { + /** The prefix that is added to the name of the partition. */ + public static final String PART_NAME_PREFIX = "$part"; + /** - * Get the offset for a given partition. That is, what is the minimum number an - * ordinal could be for a particular partition. - */ - public final static int partitionOffset(FacetIndexingParams iParams, - int partitionNumber, final TaxonomyReader taxonomyReader) { - return partitionNumber * partitionSize(iParams, taxonomyReader); - } - - /** * Get the partition size in this parameter, or return the size of the taxonomy, which * is smaller. (Guarantees usage of as little memory as possible at search time). */ @@ -58,21 +52,18 @@ /** * Partition name by category ordinal */ - public final static String partitionNameByOrdinal( - FacetIndexingParams iParams, CategoryListParams clParams, int ordinal) { + public final static String partitionNameByOrdinal(FacetIndexingParams iParams, CategoryListParams clParams, + int ordinal) { int partition = partitionNumber(iParams, ordinal); return partitionName(clParams, partition); } - /** - * Partition name by its number - */ + /** Partition name by its number */ public final static String partitionName(CategoryListParams clParams, int partition) { - String term = clParams.getTerm().text(); if (partition == 0) { - return term; // for backwards compatibility we do not add a partition number in this case + return ""; // for backwards compatibility we do not add a partition number in this case } - return term + partition; + return PART_NAME_PREFIX + Integer.toString(partition); } } Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (working copy) @@ -266,13 +266,12 @@ FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE); String delim = String.valueOf(iParams.getFacetDelimChar()); Map res = new HashMap(); - HashSet handledTerms = new HashSet(); + HashSet handledTerms = new HashSet(); for (CategoryListParams clp : iParams.getAllCategoryListParams()) { - Term baseTerm = new Term(clp.getTerm().field()); - if (!handledTerms.add(baseTerm)) { + if (!handledTerms.add(clp.field)) { continue; // already handled this term (for another list) } - Terms terms = MultiFields.getTerms(indexReader, baseTerm.field()); + Terms terms = MultiFields.getTerms(indexReader, clp.field); if (terms == null) { continue; } @@ -297,7 +296,7 @@ FacetResultNode topResNode = fr.getFacetResultNode(); FacetRequest freq = fr.getFacetRequest(); if (VERBOSE) { - System.out.println(freq.getCategoryPath().toString()+ "\t\t" + topResNode); + System.out.println(freq.categoryPath.toString()+ "\t\t" + topResNode); } assertCountsAndCardinality(facetCountsTruth, topResNode, freq.getNumResults()); } Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java (working copy) @@ -2,14 +2,9 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.TextField; -import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.search.params.CountFacetRequest; @@ -23,7 +18,6 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -116,15 +110,6 @@ return collectors; } - public static void add(FacetIndexingParams iParams, RandomIndexWriter iw, - TaxonomyWriter tw, String... strings) throws IOException { - Document d = new Document(); - FacetFields facetFields = new FacetFields(tw, iParams); - facetFields.addFields(d, Collections.singletonList(new CategoryPath(strings))); - d.add(new TextField("content", "alpha", Field.Store.YES)); - iw.addDocument(d); - } - public static class IndexTaxonomyReaderPair { public DirectoryReader indexReader; public DirectoryTaxonomyReader taxReader; Index: lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java (working copy) @@ -1,11 +1,8 @@ package org.apache.lucene.facet.index.params; -import org.apache.lucene.index.Term; +import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.facet.index.params.CategoryListParams; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -28,7 +25,7 @@ @Test public void testDefaultSettings() { CategoryListParams clp = new CategoryListParams(); - assertEquals("wrong default term", new Term("$facets", "$fulltree$"), clp.getTerm()); + assertEquals("wrong default field", "$facets", clp.field); assertEquals("unexpected default encoder", "Sorting (Unique (DGap (VInt8)))", clp.createEncoder().toString()); assertEquals("unexpected default decoder", "DGap (VInt8)", clp.createEncoder().createMatchingDecoder().toString()); } @@ -64,8 +61,8 @@ clParams1.hashCode(), clParams2.hashCode()); // Test 2 CategoryListParams with the same specified Term - clParams1 = new CategoryListParams(new Term("test")); - clParams2 = new CategoryListParams(new Term("test")); + clParams1 = new CategoryListParams("test"); + clParams2 = new CategoryListParams("test"); assertEquals( "2 CategoryListParams with the same term should equal each other.", clParams1, clParams2); @@ -73,8 +70,8 @@ clParams1.hashCode(), clParams2.hashCode()); // Test 2 CategoryListParams with DIFFERENT terms - clParams1 = new CategoryListParams(new Term("test1")); - clParams2 = new CategoryListParams(new Term("test2")); + clParams1 = new CategoryListParams("test1"); + clParams2 = new CategoryListParams("test2"); assertFalse( "2 CategoryListParams with the different terms should NOT equal each other.", clParams1.equals(clParams2)); Index: lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java (working copy) @@ -35,8 +35,7 @@ assertNotNull("Missing default category list", dfip.getAllCategoryListParams()); assertEquals("all categories have the same CategoryListParams by default", dfip.getCategoryListParams(null), dfip.getCategoryListParams(new CategoryPath("a"))); - assertEquals("Expected default category list term is $facets:$fulltree$", - new Term("$facets", "$fulltree$"), dfip.getCategoryListParams(null).getTerm()); + assertEquals("Expected default category list field is $facets", "$facets", dfip.getCategoryListParams(null).field); String expectedDDText = "a" + dfip.getFacetDelimChar() + "b"; CategoryPath cp = new CategoryPath("a", "b"); @@ -48,13 +47,13 @@ assertEquals("wrong drill-down term text", expectedDDText, new String( buf, 0, numchars)); CategoryListParams clParams = dfip.getCategoryListParams(null); - assertEquals("partition for all ordinals is the first", "$fulltree$", + assertEquals("partition for all ordinals is the first", "", PartitionsUtils.partitionNameByOrdinal(dfip, clParams , 250)); assertEquals("for partition 0, the same name should be returned", - "$fulltree$", PartitionsUtils.partitionName(clParams, 0)); + "", PartitionsUtils.partitionName(clParams, 0)); assertEquals( "for any other, it's the concatenation of name + partition", - "$fulltree$1", PartitionsUtils.partitionName(clParams, 1)); + PartitionsUtils.PART_NAME_PREFIX + "1", PartitionsUtils.partitionName(clParams, 1)); assertEquals("default partition number is always 0", 0, PartitionsUtils.partitionNumber(dfip,100)); assertEquals("default partition size is unbounded", Integer.MAX_VALUE, @@ -63,11 +62,9 @@ @Test public void testCategoryListParamsWithDefaultIndexingParams() { - CategoryListParams clp = new CategoryListParams( - new Term("clp", "value")); + CategoryListParams clp = new CategoryListParams("clp"); FacetIndexingParams dfip = new FacetIndexingParams(clp); - assertEquals("Expected default category list term is " + clp.getTerm(), - clp.getTerm(), dfip.getCategoryListParams(null).getTerm()); + assertEquals("Expected default category list field is " + clp.field, clp.field, dfip.getCategoryListParams(null).field); } @Test Index: lucene/facet/src/test/org/apache/lucene/facet/index/params/PerDimensionIndexingParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/params/PerDimensionIndexingParamsTest.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/index/params/PerDimensionIndexingParamsTest.java (working copy) @@ -32,44 +32,31 @@ public void testTopLevelSettings() { FacetIndexingParams ifip = new PerDimensionIndexingParams(Collections.emptyMap()); assertNotNull("Missing default category list", ifip.getAllCategoryListParams()); - assertEquals( - "Expected default category list term is $facets:$fulltree$", - new Term("$facets", "$fulltree$"), ifip.getCategoryListParams( - null).getTerm()); - String expectedDDText = "a" - + ifip.getFacetDelimChar() + "b"; + assertEquals("Expected default category list field is $facets", "$facets", ifip.getCategoryListParams(null).field); + String expectedDDText = "a" + ifip.getFacetDelimChar() + "b"; CategoryPath cp = new CategoryPath("a", "b"); - assertEquals("wrong drill-down term", new Term("$facets", - expectedDDText), DrillDown.term(ifip,cp)); + assertEquals("wrong drill-down term", new Term("$facets", expectedDDText), DrillDown.term(ifip,cp)); char[] buf = new char[20]; int numchars = ifip.drillDownTermText(cp, buf); assertEquals("3 characters should be written", 3, numchars); - assertEquals("wrong drill-down term text", expectedDDText, new String( - buf, 0, numchars)); + assertEquals("wrong drill-down term text", expectedDDText, new String(buf, 0, numchars)); CategoryListParams clParams = ifip.getCategoryListParams(null); - assertEquals("partition for all ordinals is the first", "$fulltree$", - PartitionsUtils.partitionNameByOrdinal(ifip, clParams , 250)); - assertEquals("for partition 0, the same name should be returned", - "$fulltree$", PartitionsUtils.partitionName(clParams, 0)); - assertEquals( - "for any other, it's the concatenation of name + partition", - "$fulltree$1", PartitionsUtils.partitionName(clParams, 1)); - assertEquals("default partition number is always 0", 0, - PartitionsUtils.partitionNumber(ifip,100)); - - assertEquals("default partition size is unbounded", Integer.MAX_VALUE, - ifip.getPartitionSize()); + assertEquals("partition for all ordinals is the first", "", PartitionsUtils.partitionNameByOrdinal(ifip, clParams , 250)); + assertEquals("for partition 0, the same name should be returned", "", PartitionsUtils.partitionName(clParams, 0)); + assertEquals("for any other, it's the concatenation of name + partition", PartitionsUtils.PART_NAME_PREFIX + "1", PartitionsUtils.partitionName(clParams, 1)); + assertEquals("default partition number is always 0", 0, PartitionsUtils.partitionNumber(ifip,100)); + assertEquals("default partition size is unbounded", Integer.MAX_VALUE, ifip.getPartitionSize()); } @Test public void testCategoryListParamsAddition() { - CategoryListParams clp = new CategoryListParams(new Term("clp", "value")); + CategoryListParams clp = new CategoryListParams("clp"); PerDimensionIndexingParams tlfip = new PerDimensionIndexingParams( Collections. singletonMap(new CategoryPath("a"), clp)); - assertEquals("Expected category list term is " + clp.getTerm(), - clp.getTerm(), tlfip.getCategoryListParams(new CategoryPath("a")).getTerm()); - assertNotSame("Unexpected default category list " + clp.getTerm(), clp, tlfip.getCategoryListParams(null)); + assertEquals("Expected category list field is " + clp.field, + clp.field, tlfip.getCategoryListParams(new CategoryPath("a")).field); + assertNotSame("Unexpected default category list " + clp.field, clp, tlfip.getCategoryListParams(null)); } } \ No newline at end of file Index: lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java (working copy) @@ -1,23 +1,15 @@ package org.apache.lucene.facet.search; -import java.io.IOException; -import java.io.Reader; import java.util.HashSet; import java.util.Set; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.TextField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; @@ -48,42 +40,6 @@ public class CategoryListIteratorTest extends LuceneTestCase { - private static final class DataTokenStream extends TokenStream { - - private final PayloadAttribute payload = addAttribute(PayloadAttribute.class); - private final BytesRef buf; - private final IntEncoder encoder; - private final CharTermAttribute term = addAttribute(CharTermAttribute.class); - - private int idx; - private boolean exhausted = false; - - public DataTokenStream(String text, IntEncoder encoder) { - this.encoder = encoder; - term.setEmpty().append(text); - buf = new BytesRef(); - payload.setPayload(buf); - } - - public void setIdx(int idx) { - this.idx = idx; - exhausted = false; - } - - @Override - public boolean incrementToken() throws IOException { - if (exhausted) { - return false; - } - - // must copy because encoders may change the buffer - encoder.encode(IntsRef.deepCopyOf(data[idx]), buf); - exhausted = true; - return true; - } - - } - static final IntsRef[] data = new IntsRef[] { new IntsRef(new int[] { 1, 2 }, 0, 2), new IntsRef(new int[] { 3, 4 }, 0, 2), @@ -95,13 +51,13 @@ public void testPayloadCategoryListIteraor() throws Exception { Directory dir = newDirectory(); final IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))); - DataTokenStream dts = new DataTokenStream("1",encoder); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())); + BytesRef buf = new BytesRef(); for (int i = 0; i < data.length; i++) { - dts.setIdx(i); Document doc = new Document(); - doc.add(new TextField("f", dts)); + encoder.encode(IntsRef.deepCopyOf(data[i]), buf); + doc.add(new StraightBytesDocValuesField("f", buf)); writer.addDocument(doc); } IndexReader reader = writer.getReader(); @@ -109,9 +65,9 @@ int totalCategories = 0; IntsRef ordinals = new IntsRef(); - CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder()); + CategoryListIterator cli = new DocValuesCategoryListIterator("f", encoder.createMatchingDecoder()); for (AtomicReaderContext context : reader.leaves()) { - cli.setNextReader(context); + assertTrue("failed to initalize iterator", cli.setNextReader(context)); int maxDoc = context.reader().maxDoc(); int dataIdx = context.docBase; for (int doc = 0; doc < maxDoc; doc++, dataIdx++) { @@ -136,24 +92,17 @@ public void testPayloadIteratorWithInvalidDoc() throws Exception { Directory dir = newDirectory(); final IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))); - DataTokenStream dts = new DataTokenStream("1", encoder); - // this test requires that no payloads ever be randomly present! - final Analyzer noPayloadsAnalyzer = new Analyzer() { - @Override - public TokenStreamComponents createComponents(String fieldName, Reader reader) { - return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.KEYWORD, false)); - } - }; // NOTE: test is wired to LogMP... because test relies on certain docids having payloads RandomIndexWriter writer = new RandomIndexWriter(random(), dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, noPayloadsAnalyzer).setMergePolicy(newLogMergePolicy())); + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < data.length; i++) { Document doc = new Document(); if (i == 0) { - dts.setIdx(i); - doc.add(new TextField("f", dts)); // only doc 0 has payloads! + BytesRef buf = new BytesRef(); + encoder.encode(IntsRef.deepCopyOf(data[i]), buf ); + doc.add(new StraightBytesDocValuesField("f", buf)); } else { - doc.add(new TextField("f", "1", Field.Store.NO)); + doc.add(new StraightBytesDocValuesField("f", new BytesRef())); } writer.addDocument(doc); writer.commit(); @@ -164,9 +113,9 @@ int totalCategories = 0; IntsRef ordinals = new IntsRef(); - CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder()); + CategoryListIterator cli = new DocValuesCategoryListIterator("f", encoder.createMatchingDecoder()); for (AtomicReaderContext context : reader.leaves()) { - cli.setNextReader(context); + assertTrue("failed to initalize iterator", cli.setNextReader(context)); int maxDoc = context.reader().maxDoc(); int dataIdx = context.docBase; for (int doc = 0; doc < maxDoc; doc++, dataIdx++) { @@ -176,13 +125,13 @@ } cli.getOrdinals(doc, ordinals); if (dataIdx == 0) { - assertTrue("document 0 must have a payload", ordinals.length > 0); + assertTrue("document 0 must have ordinals", ordinals.length > 0); for (int j = 0; j < ordinals.length; j++) { assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j])); } totalCategories += ordinals.length; } else { - assertTrue("only document 0 should have a payload", ordinals.length == 0); + assertTrue("only document 0 should have ordinals", ordinals.length == 0); } } } Index: lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java (working copy) @@ -60,8 +60,8 @@ public DrillDownTest() { Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("a"), new CategoryListParams(new Term("testing_facets_a", "a"))); - paramsMap.put(new CategoryPath("b"), new CategoryListParams(new Term("testing_facets_b", "b"))); + paramsMap.put(new CategoryPath("a"), new CategoryListParams("testing_facets_a")); + paramsMap.put(new CategoryPath("b"), new CategoryListParams("testing_facets_b")); nonDefaultParams = new PerDimensionIndexingParams(paramsMap); } @@ -113,8 +113,8 @@ } @Test - public void testTermDefault() { - String defaultField = CategoryListParams.DEFAULT_TERM.field(); + public void testDefaultField() { + String defaultField = CategoryListParams.DEFAULT_FIELD; Term termA = DrillDown.term(defaultParams, new CategoryPath("a")); assertEquals(new Term(defaultField, "a"), termA); Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java (working copy) @@ -1,145 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.util.IntsRef; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import org.apache.lucene.facet.FacetTestBase; -import org.apache.lucene.facet.index.params.CategoryListParams; -import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.search.cache.CategoryListCache; -import org.apache.lucene.facet.search.cache.CategoryListData; -import org.apache.lucene.facet.search.params.CountFacetRequest; -import org.apache.lucene.facet.search.params.FacetRequest; -import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.index.AtomicReaderContext; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -public class TestCategoryListCache extends FacetTestBase { - - public TestCategoryListCache() { - super(); - } - - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - initIndex(); - } - - @After - @Override - public void tearDown() throws Exception { - closeAll(); - super.tearDown(); - } - - @Test - public void testNoClCache() throws Exception { - doTest(false,false); - } - - @Test - public void testCorrectClCache() throws Exception { - doTest(true,false); - } - - @Test - public void testWrongClCache() throws Exception { - doTest(true,true); - } - - private void doTest(boolean withCache, boolean plantWrongData) throws Exception { - Map truth = facetCountsTruth(); - CategoryPath cp = (CategoryPath) truth.keySet().toArray()[0]; // any category path will do for this test - FacetIndexingParams iParams = FacetIndexingParams.ALL_PARENTS; - final CategoryListCache clCache; - if (withCache) { - //let's use a cached cl data - CategoryListParams clp = new CategoryListParams(); // default term ok as only single list - clCache = new CategoryListCache(); - clCache.loadAndRegister(clp, indexReader, taxoReader, iParams); - if (plantWrongData) { - // let's mess up the cached data and then expect a wrong result... - messCachedData(clCache, clp); - } - } else { - clCache = null; - } - List req = new ArrayList(); - req.add(new CountFacetRequest(cp, 10)); - final FacetSearchParams sParams = new FacetSearchParams(req, iParams) { - @Override - public CategoryListCache getCategoryListCache() { - return clCache; - } - }; - FacetsCollector fc = new FacetsCollector(sParams, indexReader, taxoReader); - searcher.search(new MatchAllDocsQuery(), fc); - List res = fc.getFacetResults(); - try { - assertCountsAndCardinality(truth, res); - assertFalse("Correct results not expected when wrong data was cached", plantWrongData); - } catch (Throwable e) { - assertTrue("Wrong results not expected unless wrong data was cached", withCache); - assertTrue("Wrong results not expected unless wrong data was cached", plantWrongData); - } - } - - /** Mess the cached data for this {@link CategoryListParams} */ - private void messCachedData(CategoryListCache clCache, CategoryListParams clp) { - final CategoryListData cld = clCache.get(clp); - CategoryListData badCld = new CategoryListData() { - @Override - public CategoryListIterator iterator(int partition) throws IOException { - final CategoryListIterator it = cld.iterator(partition); - return new CategoryListIterator() { - @Override - public void getOrdinals(int docID, IntsRef ints) throws IOException { - it.getOrdinals(docID, ints); - for (int i = 0; i < ints.length; i++) { - if (ints.ints[i] > 1) { - ints.ints[i]--; - } else { - ints.ints[i]++; - } - } - } - @Override - public boolean setNextReader(AtomicReaderContext context) throws IOException { - return it.setNextReader(context); - } - }; - } - }; - clCache.register(clp, badCld); - } - -} Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java (working copy) @@ -10,14 +10,18 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.taxonomy.CategoryPath; @@ -25,23 +29,19 @@ import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; -import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; import org.junit.Test; /* @@ -63,6 +63,18 @@ public class TestMultipleCategoryLists extends LuceneTestCase { + private static final CategoryPath[] CATEGORIES = new CategoryPath[] { + new CategoryPath("Author", "Mark Twain"), + new CategoryPath("Author", "Stephen King"), + new CategoryPath("Author", "Kurt Vonnegut"), + new CategoryPath("Band", "Rock & Pop", "The Beatles"), + new CategoryPath("Band", "Punk", "The Ramones"), + new CategoryPath("Band", "Rock & Pop", "U2"), + new CategoryPath("Band", "Rock & Pop", "REM"), + new CategoryPath("Band", "Rock & Pop", "Dave Matthews Band"), + new CategoryPath("Composer", "Bach"), + }; + @Test public void testDefault() throws Exception { Directory[][] dirs = getDirs(); @@ -72,9 +84,6 @@ // create and open a taxonomy writer TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); - /** - * Configure with no custom counting lists - */ PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(Collections.emptyMap()); seedIndex(iw, tw, iParams); @@ -88,19 +97,14 @@ // prepare searcher to search against IndexSearcher searcher = newSearcher(ir); - FacetsCollector facetsCollector = performSearch(iParams, tr, ir, - searcher); + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher); // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - DocsEnum td = _TestUtil.docs(random(), ir, "$facets", new BytesRef("$fulltree$"), MultiFields.getLiveDocs(ir), null, DocsEnum.FLAG_NONE); - assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertOrdinalsExist("$facets", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } @@ -111,12 +115,10 @@ RandomIndexWriter iw = new RandomIndexWriter(random(), dirs[0][0], newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer - TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], - OpenMode.CREATE); + TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); PerDimensionIndexingParams iParams = new PerDimensionIndexingParams( - Collections.singletonMap(new CategoryPath("Author"), - new CategoryListParams(new Term("$author", "Authors")))); + Collections.singletonMap(new CategoryPath("Author"), new CategoryListParams("$author"))); seedIndex(iw, tw, iParams); IndexReader ir = iw.getReader(); @@ -133,13 +135,10 @@ // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - assertPostingListExists("$facets", "$fulltree$", ir); - assertPostingListExists("$author", "Authors", ir); + assertOrdinalsExist("$facets", ir); + assertOrdinalsExist("$author", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } @@ -150,12 +149,11 @@ RandomIndexWriter iw = new RandomIndexWriter(random(), dirs[0][0], newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer - TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], - OpenMode.CREATE); + TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("Band"), new CategoryListParams(new Term("$music", "Bands"))); - paramsMap.put(new CategoryPath("Composer"), new CategoryListParams(new Term("$music", "Composers"))); + paramsMap.put(new CategoryPath("Band"), new CategoryListParams("$music")); + paramsMap.put(new CategoryPath("Composer"), new CategoryListParams("$music")); PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(paramsMap); seedIndex(iw, tw, iParams); @@ -168,26 +166,27 @@ // prepare searcher to search against IndexSearcher searcher = newSearcher(ir); - FacetsCollector facetsCollector = performSearch(iParams, tr, ir, - searcher); + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher); // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - assertPostingListExists("$facets", "$fulltree$", ir); - assertPostingListExists("$music", "Bands", ir); - assertPostingListExists("$music", "Composers", ir); + assertOrdinalsExist("$facets", ir); + assertOrdinalsExist("$music", ir); + assertOrdinalsExist("$music", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } - private void assertPostingListExists(String field, String text, IndexReader ir) throws IOException { - DocsEnum de = _TestUtil.docs(random(), ir, field, new BytesRef(text), null, null, DocsEnum.FLAG_NONE); - assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + private void assertOrdinalsExist(String field, IndexReader ir) throws IOException { + for (AtomicReaderContext context : ir.leaves()) { + AtomicReader r = context.reader(); + if (r.docValues(field) != null) { + return; // not all segments must have this DocValues + } + } + fail("no ordinals found for " + field); } @Test @@ -200,8 +199,8 @@ TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("Band"), new CategoryListParams(new Term("$bands", "Bands"))); - paramsMap.put(new CategoryPath("Composer"), new CategoryListParams(new Term("$composers", "Composers"))); + paramsMap.put(new CategoryPath("Band"), new CategoryListParams("$bands")); + paramsMap.put(new CategoryPath("Composer"), new CategoryListParams("$composers")); PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(paramsMap); seedIndex(iw, tw, iParams); @@ -214,18 +213,15 @@ // prepare searcher to search against IndexSearcher searcher = newSearcher(ir); - FacetsCollector facetsCollector = performSearch(iParams, tr, ir, - searcher); + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher); // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - assertPostingListExists("$facets", "$fulltree$", ir); - assertPostingListExists("$bands", "Bands", ir); - assertPostingListExists("$composers", "Composers", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + assertOrdinalsExist("$facets", ir); + assertOrdinalsExist("$bands", ir); + assertOrdinalsExist("$composers", ir); + + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } @@ -236,13 +232,12 @@ RandomIndexWriter iw = new RandomIndexWriter(random(), dirs[0][0], newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer - TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], - OpenMode.CREATE); + TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("Band"), new CategoryListParams(new Term("$music", "music"))); - paramsMap.put(new CategoryPath("Composer"), new CategoryListParams(new Term("$music", "music"))); - paramsMap.put(new CategoryPath("Author"), new CategoryListParams(new Term("$literature", "Authors"))); + paramsMap.put(new CategoryPath("Band"), new CategoryListParams("$music")); + paramsMap.put(new CategoryPath("Composer"), new CategoryListParams("$music")); + paramsMap.put(new CategoryPath("Author"), new CategoryListParams("$literature")); PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(paramsMap); seedIndex(iw, tw, iParams); @@ -256,18 +251,14 @@ // prepare searcher to search against IndexSearcher searcher = newSearcher(ir); - FacetsCollector facetsCollector = performSearch(iParams, tr, ir, - searcher); + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher); // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - assertPostingListExists("$music", "music", ir); - assertPostingListExists("$literature", "Authors", ir); + assertOrdinalsExist("$music", ir); + assertOrdinalsExist("$literature", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } @@ -275,14 +266,12 @@ return FacetTestUtils.createIndexTaxonomyDirs(1); } - private void assertCorrectResults(FacetsCollector facetsCollector) - throws IOException { + private void assertCorrectResults(FacetsCollector facetsCollector) throws IOException { List res = facetsCollector.getFacetResults(); FacetResult results = res.get(0); FacetResultNode resNode = results.getFacetResultNode(); - Iterable subResults = resNode - .getSubResults(); + Iterable subResults = resNode.getSubResults(); Iterator subIter = subResults.iterator(); checkResult(resNode, "Band", 5.0); @@ -325,9 +314,8 @@ checkResult(subIter.next(), "Band/Rock & Pop/The Beatles", 1.0); } - private FacetsCollector performSearch(FacetIndexingParams iParams, - TaxonomyReader tr, IndexReader ir, - IndexSearcher searcher) throws IOException { + private FacetsCollector performSearch(FacetIndexingParams iParams, TaxonomyReader tr, IndexReader ir, + IndexSearcher searcher) throws IOException { // step 1: collect matching documents into a collector Query q = new MatchAllDocsQuery(); TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, true); @@ -344,7 +332,6 @@ // Faceted search parameters indicate which facets are we interested in FacetSearchParams facetSearchParams = new FacetSearchParams(facetRequests, iParams); - // perform documents search and facets accumulation FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, ir, tr); @@ -352,27 +339,19 @@ return facetsCollector; } - private void seedIndex(RandomIndexWriter iw, TaxonomyWriter tw, - FacetIndexingParams iParams) throws IOException { - FacetTestUtils.add(iParams, iw, tw, "Author", "Mark Twain"); - FacetTestUtils.add(iParams, iw, tw, "Author", "Stephen King"); - FacetTestUtils.add(iParams, iw, tw, "Author", "Kurt Vonnegut"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", - "The Beatles"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Punk", "The Ramones"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", "U2"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", "REM"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", - "Dave Matthews Band"); - FacetTestUtils.add(iParams, iw, tw, "Composer", "Bach"); + private void seedIndex(RandomIndexWriter iw, TaxonomyWriter tw, FacetIndexingParams iParams) throws IOException { + FacetFields facetFields = new FacetFields(tw, iParams); + for (CategoryPath cp : CATEGORIES) { + Document doc = new Document(); + facetFields.addFields(doc, Collections.singletonList(cp)); + doc.add(new TextField("content", "alpha", Field.Store.YES)); + iw.addDocument(doc); + } } private static void checkResult(FacetResultNode sub, String label, double value) { - assertEquals("Label of subresult " + sub.getLabel() + " was incorrect", - label, sub.getLabel().toString()); - assertEquals( - "Value for " + sub.getLabel() + " subresult was incorrect", - value, sub.getValue(), 0.0); + assertEquals("Label of subresult " + sub.getLabel() + " was incorrect", label, sub.getLabel().toString()); + assertEquals("Value for " + sub.getLabel() + " subresult was incorrect", value, sub.getValue(), 0.0); } } \ No newline at end of file Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java (working copy) @@ -93,6 +93,7 @@ indexTwoDocs(indexWriter, null, false); // 4th segment, no content, or categories indexTwoDocs(indexWriter, null, true); // 5th segment, with content, no categories indexTwoDocs(indexWriter, facetFields, true); // 6th segment, with content, with categories + indexTwoDocs(indexWriter, null, true); // 7th segment, with content, no categories IOUtils.close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.open(indexDir); Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (working copy) @@ -178,7 +178,7 @@ } FacetResult fr = facetResults.get(0); // a, depth=3, K=2 - boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); + boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(9, fr.getNumValidDescendants()); FacetResultNode parentRes = fr.getFacetResultNode(); assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE); @@ -219,7 +219,7 @@ } fr = facetResults.get(1); // a, depth=2, K=2. same result as before - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(9, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE); @@ -239,7 +239,7 @@ } fr = facetResults.get(2); // a, depth=1, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(4, fr.getNumValidDescendants(), 4); parentRes = fr.getFacetResultNode(); assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE); @@ -257,7 +257,7 @@ } fr = facetResults.get(3); // a/b, depth=3, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(4, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); @@ -272,7 +272,7 @@ } fr = facetResults.get(4); // a/b, depth=2, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(4, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); @@ -286,7 +286,7 @@ } fr = facetResults.get(5); // a/b, depth=1, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(4, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); @@ -300,13 +300,13 @@ } fr = facetResults.get(6); // a/b, depth=0, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); assertEquals(0.0, parentRes.getResidue(), Double.MIN_VALUE); assertEquals(0, parentRes.getNumSubResults()); - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); // doctor, depth=1, K=2 assertFalse("Shouldn't have found anything for a FacetRequest " + Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java (revision 1433611) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java (working copy) @@ -85,12 +85,12 @@ TotalFacetCountsCache tfcc = TotalFacetCountsCache.getSingleton(); File tmpFile = _TestUtil.createTempFile("test", "tmp", TEMP_DIR); - tfcc.store(tmpFile, readers[0].indexReader, readers[0].taxReader, iParams, null); + tfcc.store(tmpFile, readers[0].indexReader, readers[0].taxReader, iParams); tfcc.clear(); // not really required because TFCC overrides on load(), but in the test we need not rely on this. tfcc.load(tmpFile, readers[0].indexReader, readers[0].taxReader, iParams); // now retrieve the one just loaded - TotalFacetCounts totalCounts = tfcc.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null); + TotalFacetCounts totalCounts = tfcc.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams); int partition = 0; for (int i=0; i clps = new HashMap(); for (String dim : dimensions) { CategoryPath cp = new CategoryPath(dim); - CategoryListParams clp = new CategoryListParams(new Term("$" + dim, CategoryListParams.DEFAULT_TERM.bytes())); + CategoryListParams clp = new CategoryListParams("$" + dim); clps.put(cp, clp); } PerDimensionIndexingParams indexingParams = new PerDimensionIndexingParams(clps); @@ -86,23 +84,13 @@ IOUtils.close(indexWriter, taxoWriter); // test the multi iterator - CategoryListCache clCache = null; - if (random.nextBoolean()) { - clCache = new CategoryListCache(); - } - DirectoryReader indexReader = DirectoryReader.open(indexDir); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); CategoryListIterator[] iterators = new CategoryListIterator[numDimensions]; for (int i = 0; i < iterators.length; i++) { CategoryListParams clp = indexingParams.getCategoryListParams(new CategoryPath(dimensions[i])); IntDecoder decoder = clp.createEncoder().createMatchingDecoder(); - if (clCache != null && random.nextBoolean()) { - clCache.loadAndRegister(clp, indexReader, taxoReader, indexingParams); - iterators[i] = clCache.get(clp).iterator(0); // no partitions - } else { - iterators[i] = new PayloadCategoryListIteraor(clp.getTerm(), decoder); - } + iterators[i] = new DocValuesCategoryListIterator(clp.field, decoder); } MultiCategoryListIterator cli = new MultiCategoryListIterator(iterators); for (AtomicReaderContext context : indexReader.leaves()) {