Index: lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java =================================================================== --- lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java (revision 1433426) +++ lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java (working copy) @@ -90,8 +90,9 @@ DirectoryReader reader = DirectoryReader.open(srcIndexDir, -1); List leaves = reader.leaves(); - AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()]; - for (int i = 0; i < leaves.size(); i++) { + int numReaders = leaves.size(); + AtomicReader wrappedLeaves[] = new AtomicReader[numReaders]; + for (int i = 0; i < numReaders; i++) { wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap, params); } try { Index: lucene/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java =================================================================== --- lucene/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java (revision 1433426) +++ lucene/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java (working copy) @@ -20,7 +20,6 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -80,12 +79,12 @@ // Initialize PerDimensionIndexingParams static { Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("0"), new CategoryListParams(new Term("$Digits", "Zero"))); - paramsMap.put(new CategoryPath("1"), new CategoryListParams(new Term("$Digits", "One"))); - paramsMap.put(new CategoryPath("2"), new CategoryListParams(new Term("$Digits", "Two"))); - paramsMap.put(new CategoryPath("3"), new CategoryListParams(new Term("$Digits", "Three"))); - paramsMap.put(new CategoryPath("4"), new CategoryListParams(new Term("$Digits", "Four"))); - paramsMap.put(new CategoryPath("5"), new CategoryListParams(new Term("$Digits", "Five"))); + paramsMap.put(new CategoryPath("0"), new CategoryListParams("$Digits$Zero")); + paramsMap.put(new CategoryPath("1"), new CategoryListParams("$Digits$One")); + paramsMap.put(new CategoryPath("2"), new CategoryListParams("$Digits$Two")); + paramsMap.put(new CategoryPath("3"), new CategoryListParams("$Digits$Three")); + paramsMap.put(new CategoryPath("4"), new CategoryListParams("$Digits$Four")); + paramsMap.put(new CategoryPath("5"), new CategoryListParams("$Digits$Five")); MULTI_IPARAMS = new PerDimensionIndexingParams(paramsMap); } Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java (working copy) @@ -114,7 +114,7 @@ } @Override - protected FieldType fieldType() { + protected FieldType drillDownFieldType() { return DRILL_DOWN_TYPE; } Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java (working copy) @@ -0,0 +1,98 @@ +package org.apache.lucene.facet.associations; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.util.BytesRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An iterator over a document's category associations. + * + * @lucene.experimental + */ +public abstract class AssociationsIterator { + + private final T association; + private final String dvField; + private final boolean useDirectSource; + private final BytesRef bytes = new BytesRef(32); + + private DocValues.Source current; + + /** + * Construct a new associations iterator. The given + * {@link CategoryAssociation} is used to deserialize the association values. + * It is assumed that all association values can be deserialized with the + * given {@link CategoryAssociation}. + * + *

+ * NOTE: if {@code useDirectSource} is {@code false}, then a + * {@link DocValues#getSource()} is used, which is an in-memory {@link Source}. + */ + public AssociationsIterator(String field, T association, boolean useDirectSource) throws IOException { + this.association = association; + this.dvField = field + association.getCategoryListID(); + this.useDirectSource = useDirectSource; + } + + /** + * Sets the {@link AtomicReaderContext} for which {@link #setNextDoc(int)} + * calls will be made. Returns true iff this reader has associations for any + * of the documents belonging to the association given to the constructor. + */ + public final boolean setNextReader(AtomicReaderContext context) throws IOException { + DocValues dv = context.reader().docValues(dvField); + if (dv == null) { + current = null; + return false; + } + + current = useDirectSource ? dv.getDirectSource() : dv.getSource(); + return true; + } + + /** + * Skip to the requested document. Returns true iff the document has category + * association values and they were read successfully. Associations are + * handled through {@link #handleAssociation(int, CategoryAssociation)} by + * extending classes. + */ + protected final boolean setNextDoc(int docID) throws IOException { + current.getBytes(docID, bytes); + if (bytes.length == 0) { + return false; // no associations for the requested document + } + + ByteArrayDataInput in = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length); + while (!in.eof()) { + int ordinal = in.readInt(); + association.deserialize(in); + handleAssociation(ordinal, association); + } + return true; + } + + /** A hook for extending classes to handle the given association value for the ordinal. */ + protected abstract void handleAssociation(int ordinal, T association); + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java (working copy) @@ -1,92 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.facet.search.PayloadIterator; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.util.BytesRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An iterator over a document's category associations. - * - * @lucene.experimental - */ -public abstract class AssociationsPayloadIterator { - - private final PayloadIterator pi; - private final T association; - - /** - * Marking whether there are associations (at all) in the given index - */ - private boolean hasAssociations = false; - - /** - * Construct a new associations iterator. The given - * {@link CategoryAssociation} is used to deserialize the association values. - * It is assumed that all association values can be deserialized with the - * given {@link CategoryAssociation}. - */ - public AssociationsPayloadIterator(String field, T association) throws IOException { - pi = new PayloadIterator(new Term(field, association.getCategoryListID())); - this.association = association; - } - - /** - * Sets the {@link AtomicReaderContext} for which {@link #setNextDoc(int)} - * calls will be made. Returns true iff this reader has associations for any - * of the documents belonging to the association given to the constructor. - */ - public final boolean setNextReader(AtomicReaderContext context) throws IOException { - hasAssociations = pi.setNextReader(context); - return hasAssociations; - } - - /** - * Skip to the requested document. Returns true iff the document has category - * association values and they were read successfully. Associations are - * handled through {@link #handleAssociation(int, CategoryAssociation)} by - * extending classes. - */ - protected final boolean setNextDoc(int docID) throws IOException { - if (!hasAssociations) { // there are no associations at all - return false; - } - - BytesRef bytes = pi.getPayload(docID); - if (bytes == null) { // no associations for the requested document - return false; - } - - ByteArrayDataInput in = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length); - while (!in.eof()) { - int ordinal = in.readInt(); - association.deserialize(in); - handleAssociation(ordinal, association); - } - return true; - } - - /** A hook for extending classes to handle the given association value for the ordinal. */ - protected abstract void handleAssociation(int ordinal, T association); - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java (working copy) @@ -0,0 +1,69 @@ +package org.apache.lucene.facet.associations; + +import java.io.IOException; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.util.collections.IntToFloatMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An {@link AssociationsIterator} over integer association values. + * + * @lucene.experimental + */ +public class FloatAssociationsIterator extends AssociationsIterator { + + private final IntToFloatMap ordinalAssociations = new IntToFloatMap(); + + /** + * Constructs a new {@link FloatAssociationsIterator} which uses an + * in-memory {@link DocValues#getSource() DocValues source}. + */ + public FloatAssociationsIterator(String field, CategoryFloatAssociation association) throws IOException { + this(field, association, false); + } + + /** + * Constructs a new {@link FloatAssociationsIterator} which uses a + * {@link DocValues} {@link Source} per {@code useDirectSource}. + */ + public FloatAssociationsIterator(String field, CategoryFloatAssociation association, boolean useDirectSource) + throws IOException { + super(field, association, useDirectSource); + } + + @Override + protected void handleAssociation(int ordinal, CategoryFloatAssociation association) { + ordinalAssociations.put(ordinal, association.getValue()); + } + + /** + * Returns the float association values of the categories that are associated + * with the given document, or {@code null} if the document has no + * associations. + *

+ * NOTE: you are not expected to modify the returned map. + */ + public IntToFloatMap getAssociations(int docID) throws IOException { + ordinalAssociations.clear(); + return setNextDoc(docID) ? ordinalAssociations : null; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java (working copy) @@ -1,54 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.util.collections.IntToFloatMap; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An {@link AssociationsPayloadIterator} over integer association values. - * - * @lucene.experimental - */ -public class FloatAssociationsPayloadIterator extends AssociationsPayloadIterator { - - private final IntToFloatMap ordinalAssociations = new IntToFloatMap(); - - public FloatAssociationsPayloadIterator(String field, CategoryFloatAssociation association) throws IOException { - super(field, association); - } - - @Override - protected void handleAssociation(int ordinal, CategoryFloatAssociation association) { - ordinalAssociations.put(ordinal, association.getValue()); - } - - /** - * Returns the float association values of the categories that are associated - * with the given document, or {@code null} if the document has no - * associations. - *

- * NOTE: you are not expected to modify the returned map. - */ - public IntToFloatMap getAssociations(int docID) throws IOException { - ordinalAssociations.clear(); - return setNextDoc(docID) ? ordinalAssociations : null; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java (working copy) @@ -0,0 +1,69 @@ +package org.apache.lucene.facet.associations; + +import java.io.IOException; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.util.collections.IntToIntMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An {@link AssociationsIterator} over integer association values. + * + * @lucene.experimental + */ +public class IntAssociationsIterator extends AssociationsIterator { + + private final IntToIntMap ordinalAssociations = new IntToIntMap(); + + /** + * Constructs a new {@link IntAssociationsIterator} which uses an + * in-memory {@link DocValues#getSource() DocValues source}. + */ + public IntAssociationsIterator(String field, CategoryIntAssociation association) throws IOException { + this(field, association, false); + } + + /** + * Constructs a new {@link IntAssociationsIterator} which uses a + * {@link DocValues} {@link Source} per {@code useDirectSource}. + */ + public IntAssociationsIterator(String field, CategoryIntAssociation association, boolean useDirectSource) + throws IOException { + super(field, association, useDirectSource); + } + + @Override + protected void handleAssociation(int ordinal, CategoryIntAssociation association) { + ordinalAssociations.put(ordinal, association.getValue()); + } + + /** + * Returns the integer association values of the categories that are + * associated with the given document, or {@code null} if the document has no + * associations. + *

+ * NOTE: you are not expected to modify the returned map. + */ + public IntToIntMap getAssociations(int docID) throws IOException { + ordinalAssociations.clear(); + return setNextDoc(docID) ? ordinalAssociations : null; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java (working copy) @@ -1,54 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.util.collections.IntToIntMap; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An {@link AssociationsPayloadIterator} over integer association values. - * - * @lucene.experimental - */ -public class IntAssociationsPayloadIterator extends AssociationsPayloadIterator { - - private final IntToIntMap ordinalAssociations = new IntToIntMap(); - - public IntAssociationsPayloadIterator(String field, CategoryIntAssociation association) throws IOException { - super(field, association); - } - - @Override - protected void handleAssociation(int ordinal, CategoryIntAssociation association) { - ordinalAssociations.put(ordinal, association.getValue()); - } - - /** - * Returns the integer association values of the categories that are - * associated with the given document, or {@code null} if the document has no - * associations. - *

- * NOTE: you are not expected to modify the returned map. - */ - public IntToIntMap getAssociations(int docID) throws IOException { - ordinalAssociations.clear(); - return setNextDoc(docID) ? ordinalAssociations : null; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java (working copy) @@ -56,10 +56,9 @@ private static final class NoPartitionsOrdinalsEncoder extends OrdinalsEncoder { private final IntEncoder encoder; - private final String name; + private final String name = ""; NoPartitionsOrdinalsEncoder(CategoryListParams categoryListParams) { - name = categoryListParams.getTerm().text(); encoder = categoryListParams.createEncoder(); } Index: lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (working copy) @@ -4,17 +4,14 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; @@ -51,32 +48,6 @@ */ public class FacetFields { - // a TokenStream for writing the counting list payload - private static final class CountingListStream extends TokenStream { - private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private Iterator> categoriesData; - - CountingListStream() {} - - @Override - public boolean incrementToken() throws IOException { - if (!categoriesData.hasNext()) { - return false; - } - - Entry entry = categoriesData.next(); - termAtt.setEmpty().append(entry.getKey()); - payloadAtt.setPayload(entry.getValue()); - return true; - } - - void setCategoriesData(Map categoriesData) { - this.categoriesData = categoriesData.entrySet().iterator(); - } - - } - // The counting list is written in a payload, but we don't store it // nor need norms. private static final FieldType COUNTING_LIST_PAYLOAD_TYPE = new FieldType(); @@ -94,9 +65,7 @@ // Therefore we set its IndexOptions to DOCS_ONLY. private static final FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED); static { - // TODO: once we cutover to DocValues, we can set it to DOCS_ONLY for this - // FacetFields (not associations) - DRILL_DOWN_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + DRILL_DOWN_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); DRILL_DOWN_TYPE.freeze(); } @@ -175,10 +144,20 @@ * Returns the {@link FieldType} with which the drill-down terms should be * indexed. The default is {@link IndexOptions#DOCS_ONLY}. */ - protected FieldType fieldType() { + protected FieldType drillDownFieldType() { return DRILL_DOWN_TYPE; } + /** + * Add the counting list data to the document under the given field. Note that + * the field is determined by the {@link CategoryListParams}. + */ + protected void addCountingListData(Document doc, Map categoriesData, String field) { + for (Entry entry : categoriesData.entrySet()) { + doc.add(new StraightBytesDocValuesField(field + entry.getKey(), entry.getValue())); + } + } + /** Adds the needed facet fields to the document. */ public void addFields(Document doc, Iterable categories) throws IOException { if (categories == null) { @@ -198,7 +177,7 @@ IntsRef ordinals = new IntsRef(32); // should be enough for most common applications for (Entry> e : categoryLists.entrySet()) { final CategoryListParams clp = e.getKey(); - final String field = clp.getTerm().field(); + final String field = clp.field; // build category list data ordinals.length = 0; // reset @@ -214,13 +193,11 @@ Map categoriesData = getCategoryListData(clp, ordinals, e.getValue()); // add the counting list data - CountingListStream ts = new CountingListStream(); - ts.setCategoriesData(categoriesData); - doc.add(new Field(field, ts, COUNTING_LIST_PAYLOAD_TYPE)); + addCountingListData(doc, categoriesData, field); // add the drill-down field DrillDownStream drillDownStream = getDrillDownStream(e.getValue()); - Field drillDown = new Field(field, drillDownStream, fieldType()); + Field drillDown = new Field(field, drillDownStream, drillDownFieldType()); doc.add(drillDown); } } Index: lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java (working copy) @@ -25,13 +25,10 @@ import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap; import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.Fields; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.FilterAtomicReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.encoding.IntDecoder; @@ -41,8 +38,8 @@ * A {@link FilterAtomicReader} for updating facets ordinal references, * based on an ordinal map. You should use this code in conjunction with merging * taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap} - * which maps the 'old' payloads to the 'new' ones. You can use that map to - * re-map the payloads which contain the facets information (ordinals) either + * which maps the 'old' ordinals to the 'new' ones. You can use that map to + * re-map the doc values which contain the facets information (ordinals) either * before or while merging the indexes. *

* For re-mapping the ordinals during index merge, do the following: @@ -70,10 +67,9 @@ public class OrdinalMappingAtomicReader extends FilterAtomicReader { private final int[] ordinalMap; - // a little obtuse: but we dont need to create Term objects this way - private final Map> termMap = - new HashMap>(1); + private final Map dvFieldMap = new HashMap(); + /** * Wraps an AtomicReader, mapping ordinals according to the ordinalMap. * Calls {@link #OrdinalMappingAtomicReader(AtomicReader, int[], FacetIndexingParams) @@ -91,125 +87,85 @@ super(in); this.ordinalMap = ordinalMap; for (CategoryListParams params: indexingParams.getAllCategoryListParams()) { - Term term = params.getTerm(); - Map fieldMap = termMap.get(term.field()); - if (fieldMap == null) { - fieldMap = new HashMap(1); - termMap.put(term.field(), fieldMap); - } - fieldMap.put(term.bytes(), params); + dvFieldMap.put(params.field, params); } } @Override - public Fields getTermVectors(int docID) throws IOException { - Fields fields = super.getTermVectors(docID); - if (fields == null) { - return null; - } else { - return new OrdinalMappingFields(fields); + public DocValues docValues(String field) throws IOException { + DocValues inner = super.docValues(field); + if (inner == null) { + return inner; } - } - - @Override - public Fields fields() throws IOException { - Fields fields = super.fields(); - if (fields == null) { - return null; + + CategoryListParams clp = dvFieldMap.get(field); + if (clp == null) { + return inner; } else { - return new OrdinalMappingFields(fields); + return new OrdinalMappingDocValues(inner, clp); } } - private class OrdinalMappingFields extends FilterFields { + private class OrdinalMappingDocValues extends DocValues { - public OrdinalMappingFields(Fields in) { - super(in); + private final CategoryListParams clp; + private final DocValues delegate; + + public OrdinalMappingDocValues(DocValues delegate, CategoryListParams clp) { + this.delegate = delegate; + this.clp = clp; } @Override - public Terms terms(String field) throws IOException { - Terms terms = super.terms(field); - if (terms == null) { - return terms; - } - Map termsMap = termMap.get(field); - if (termsMap == null) { - return terms; - } else { - return new OrdinalMappingTerms(terms, termsMap); - } + protected Source loadSource() throws IOException { + return new OrdinalMappingSource(getType(), clp, delegate.getSource()); } - } - - private class OrdinalMappingTerms extends FilterTerms { - private final Map termsMap; - - public OrdinalMappingTerms(Terms in, Map termsMap) { - super(in); - this.termsMap = termsMap; - } @Override - public TermsEnum iterator(TermsEnum reuse) throws IOException { - // TODO: should we reuse the inner termsenum? - return new OrdinalMappingTermsEnum(super.iterator(reuse), termsMap); + protected Source loadDirectSource() throws IOException { + return new OrdinalMappingSource(getType(), clp, delegate.getDirectSource()); } - } - - private class OrdinalMappingTermsEnum extends FilterTermsEnum { - private final Map termsMap; - - public OrdinalMappingTermsEnum(TermsEnum in, Map termsMap) { - super(in); - this.termsMap = termsMap; - } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { - // TODO: we could reuse our D&P enum if we need - DocsAndPositionsEnum inner = super.docsAndPositions(liveDocs, reuse, flags); - if (inner == null) { - return inner; - } - - CategoryListParams params = termsMap.get(term()); - if (params == null) { - return inner; - } - - return new OrdinalMappingDocsAndPositionsEnum(inner, params); + public Type getType() { + return Type.BYTES_VAR_STRAIGHT; } + } - private class OrdinalMappingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum { + private class OrdinalMappingSource extends Source { + private final IntEncoder encoder; private final IntDecoder decoder; private final IntsRef ordinals = new IntsRef(32); - private final BytesRef payloadOut = new BytesRef(); - - public OrdinalMappingDocsAndPositionsEnum(DocsAndPositionsEnum in, CategoryListParams params) { - super(in); - encoder = params.createEncoder(); + private final Source delegate; + + protected OrdinalMappingSource(Type type, CategoryListParams clp, Source delegate) { + super(type); + this.delegate = delegate; + encoder = clp.createEncoder(); decoder = encoder.createMatchingDecoder(); } - + + @SuppressWarnings("synthetic-access") @Override - public BytesRef getPayload() throws IOException { - BytesRef payload = super.getPayload(); - if (payload == null) { - return payload; + public BytesRef getBytes(int docID, BytesRef ref) { + ref = delegate.getBytes(docID, ref); + if (ref == null || ref.length == 0) { + return ref; } else { - decoder.decode(payload, ordinals); + decoder.decode(ref, ordinals); // map the ordinals for (int i = 0; i < ordinals.length; i++) { ordinals.ints[i] = ordinalMap[ordinals.ints[i]]; } - encoder.encode(ordinals, payloadOut); - return payloadOut; + encoder.encode(ordinals, ref); + return ref; } } + } + } Index: lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java (working copy) @@ -4,9 +4,8 @@ import java.io.Serializable; import org.apache.lucene.facet.search.CategoryListIterator; -import org.apache.lucene.facet.search.PayloadCategoryListIteraor; +import org.apache.lucene.facet.search.DocValuesCategoryListIterator; import org.apache.lucene.facet.util.PartitionsUtils; -import org.apache.lucene.index.Term; import org.apache.lucene.util.encoding.DGapIntEncoder; import org.apache.lucene.util.encoding.IntDecoder; import org.apache.lucene.util.encoding.IntEncoder; @@ -38,39 +37,26 @@ */ public class CategoryListParams implements Serializable { - /** The default term used to store the facets information. */ - public static final Term DEFAULT_TERM = new Term("$facets", "$fulltree$"); + /** The default field used to store the facets information. */ + public static final String DEFAULT_FIELD = "$facets"; - private final Term term; + public final String field; private final int hashCode; - /** - * Constructs a default category list parameters object, using - * {@link #DEFAULT_TERM}. - */ + /** Constructs a default category list parameters object, using {@link #DEFAULT_FIELD}. */ public CategoryListParams() { - this(DEFAULT_TERM); + this(DEFAULT_FIELD); } - /** - * Constructs a category list parameters object, using the given {@link Term}. - * @param term who's payload hold the category-list. - */ - public CategoryListParams(Term term) { - this.term = term; + /** Constructs a category list parameters object, using the given field. */ + public CategoryListParams(String field) { + this.field = field; // Pre-compute the hashCode because these objects are immutable. Saves // some time on the comparisons later. - this.hashCode = term.hashCode(); + this.hashCode = field.hashCode(); } - /** - * A {@link Term} who's payload holds the category-list. - */ - public final Term getTerm() { - return term; - } - /** * Allows to override how categories are encoded and decoded. A matching * {@link IntDecoder} is provided by the {@link IntEncoder}. @@ -110,7 +96,7 @@ // The above hashcodes might equal each other in the case of a collision, // so at this point only directly term equality testing will settle // the equality test. - return this.term.equals(other.term); + return field.equals(other.field); } @Override @@ -121,8 +107,8 @@ /** Create the {@link CategoryListIterator} for the specified partition. */ public CategoryListIterator createCategoryListIterator(int partition) throws IOException { String categoryListTermStr = PartitionsUtils.partitionName(this, partition); - Term payloadTerm = new Term(term.field(), categoryListTermStr); - return new PayloadCategoryListIteraor(payloadTerm, createEncoder().createMatchingDecoder()); + String docValuesField = field + categoryListTermStr; + return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder()); } } \ No newline at end of file Index: lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java (working copy) @@ -0,0 +1,105 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.encoding.IntDecoder; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** A {@link CategoryListIterator} which reads the ordinals from a {@link DocValues}. */ +public class DocValuesCategoryListIterator implements CategoryListIterator { + + private final IntDecoder decoder; + private final String field; + private final int hashCode; + private final boolean useDirectSource; + private final BytesRef bytes = new BytesRef(32); + + private DocValues.Source current; + + /** + * Constructs a new {@link DocValuesCategoryListIterator} which uses an + * in-memory {@link Source}. + */ + public DocValuesCategoryListIterator(String field, IntDecoder decoder) { + this(field, decoder, false); + } + + /** + * Constructs a new {@link DocValuesCategoryListIterator} which uses either a + * {@link DocValues#getDirectSource() direct source} or + * {@link DocValues#getSource() in-memory} one. + */ + public DocValuesCategoryListIterator(String field, IntDecoder decoder, boolean useDirectSource) { + this.field = field; + this.decoder = decoder; + this.hashCode = field.hashCode(); + this.useDirectSource = useDirectSource; + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof DocValuesCategoryListIterator)) { + return false; + } + DocValuesCategoryListIterator other = (DocValuesCategoryListIterator) o; + if (hashCode != other.hashCode) { + return false; + } + + // Hash codes are the same, check equals() to avoid cases of hash-collisions. + return field.equals(other.field); + } + + @Override + public boolean setNextReader(AtomicReaderContext context) throws IOException { + DocValues dv = context.reader().docValues(field); + if (dv == null) { + current = null; + return false; + } + + current = useDirectSource ? dv.getDirectSource() : dv.getSource(); + return true; + } + + @Override + public void getOrdinals(int docID, IntsRef ints) throws IOException { + current.getBytes(docID, bytes); + ints.length = 0; + if (bytes.length > 0) { + decoder.decode(bytes, ints); + } + } + + @Override + public String toString() { + return field; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (working copy) @@ -55,7 +55,7 @@ CategoryListParams clp = iParams.getCategoryListParams(path); char[] buffer = new char[path.fullPathLength()]; iParams.drillDownTermText(path, buffer); - return new Term(clp.getTerm().field(), String.valueOf(buffer)); + return new Term(clp.field, String.valueOf(buffer)); } /** Index: lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java (working copy) @@ -1,81 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.encoding.IntDecoder; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A {@link CategoryListIterator} which reads the category ordinals from a - * payload. - * - * @lucene.experimental - */ -public class PayloadCategoryListIteraor implements CategoryListIterator { - - private final IntDecoder decoder; - private final Term term; - private final PayloadIterator pi; - private final int hashCode; - - public PayloadCategoryListIteraor(Term term, IntDecoder decoder) throws IOException { - pi = new PayloadIterator(term); - this.decoder = decoder; - hashCode = term.hashCode(); - this.term = term; - } - - @Override - public boolean equals(Object other) { - if (!(other instanceof PayloadCategoryListIteraor)) { - return false; - } - PayloadCategoryListIteraor that = (PayloadCategoryListIteraor) other; - if (hashCode != that.hashCode) { - return false; - } - - // Hash codes are the same, check equals() to avoid cases of hash-collisions. - return term.equals(that.term); - } - - @Override - public int hashCode() { - return hashCode; - } - - @Override - public boolean setNextReader(AtomicReaderContext context) throws IOException { - return pi.setNextReader(context); - } - - @Override - public void getOrdinals(int docID, IntsRef ints) throws IOException { - ints.length = 0; - BytesRef payload = pi.getPayload(docID); - if (payload != null) { - decoder.decode(payload, ints); - } - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java (working copy) @@ -1,114 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A utility class for iterating through a posting list of a given term and - * retrieving the payload of the first position in every document. For - * efficiency, this class does not check if documents passed to - * {@link #getPayload(int)} are deleted, since it is usually used to iterate on - * payloads of documents that matched a query. If you need to skip over deleted - * documents, you should do so before calling {@link #getPayload(int)}. - * - * @lucene.experimental - */ -public class PayloadIterator { - - private TermsEnum reuseTE; - private DocsAndPositionsEnum dpe; - private boolean hasMore; - private int curDocID; - - private final Term term; - - public PayloadIterator(Term term) throws IOException { - this.term = term; - } - - /** - * Sets the {@link AtomicReaderContext} for which {@link #getPayload(int)} - * calls will be made. Returns true iff this reader has payload for any of the - * documents belonging to the {@link Term} given to the constructor. - */ - public boolean setNextReader(AtomicReaderContext context) throws IOException { - hasMore = false; - Fields fields = context.reader().fields(); - if (fields != null) { - Terms terms = fields.terms(term.field()); - if (terms != null) { - reuseTE = terms.iterator(reuseTE); - if (reuseTE.seekExact(term.bytes(), true)) { - // this class is usually used to iterate on whatever a Query matched - // if it didn't match deleted documents, we won't receive them. if it - // did, we should iterate on them too, therefore we pass liveDocs=null - dpe = reuseTE.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_PAYLOADS); - if (dpe != null && (curDocID = dpe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - hasMore = true; - } - } - } - } - return hasMore; - } - - /** - * Returns the {@link BytesRef payload} of the given document, or {@code null} - * if the document does not exist, there are no more documents in the posting - * list, or the document exists but has not payload. The given document IDs - * are treated as local to the reader given to - * {@link #setNextReader(AtomicReaderContext)}. - */ - public BytesRef getPayload(int docID) throws IOException { - if (!hasMore) { - return null; - } - - if (curDocID > docID) { - // document does not exist - return null; - } - - if (curDocID < docID) { - curDocID = dpe.advance(docID); - if (curDocID != docID) { // requested document does not have a payload - if (curDocID == DocIdSetIterator.NO_MORE_DOCS) { // no more docs in this reader - hasMore = false; - } - return null; - } - } - - // we're on the document - assert dpe.freq() == 1 : "expecting freq=1 (got " + dpe.freq() + ") term=" + term + " doc=" + curDocID; - int pos = dpe.nextPosition(); - assert pos != -1 : "no positions for term=" + term + " doc=" + curDocID; - return dpe.getPayload(); - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (working copy) @@ -242,20 +242,29 @@ int maxDoc = -1; while (iterator.next()) { int docID = iterator.getDocID(); - while (docID >= maxDoc) { // find the segment which contains this document - if (!contexts.hasNext()) { - throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?"); - } - current = contexts.next(); - maxDoc = current.docBase + current.reader().maxDoc(); - if (docID < maxDoc) { // segment has docs, check if it has categories - boolean validSegment = categoryListIter.setNextReader(current); - validSegment &= aggregator.setNextReader(current); - if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs - while (docID < maxDoc && iterator.next()) { - docID = iterator.getDocID(); + if (docID >= maxDoc) { + boolean iteratorDone = false; + do { // find the segment which contains this document + if (!contexts.hasNext()) { + throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?"); + } + current = contexts.next(); + maxDoc = current.docBase + current.reader().maxDoc(); + if (docID < maxDoc) { // segment has docs, check if it has categories + boolean validSegment = categoryListIter.setNextReader(current); + validSegment &= aggregator.setNextReader(current); + if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs + while (docID < maxDoc && iterator.next()) { + docID = iterator.getDocID(); + } + if (docID < maxDoc) { + iteratorDone = true; + } } } + } while (docID >= maxDoc); + if (iteratorDone) { // iterator finished, terminate the loop + break; } } docID -= current.docBase; Index: lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java (working copy) @@ -3,7 +3,7 @@ import java.io.IOException; import org.apache.lucene.facet.associations.CategoryFloatAssociation; -import org.apache.lucene.facet.associations.FloatAssociationsPayloadIterator; +import org.apache.lucene.facet.associations.FloatAssociationsIterator; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.search.aggregator.Aggregator; import org.apache.lucene.index.AtomicReaderContext; @@ -37,15 +37,15 @@ protected final String field; protected final float[] sumArray; - protected final FloatAssociationsPayloadIterator associations; + protected final FloatAssociationsIterator associations; public AssociationFloatSumAggregator(float[] sumArray) throws IOException { - this(CategoryListParams.DEFAULT_TERM.field(), sumArray); + this(CategoryListParams.DEFAULT_FIELD, sumArray); } public AssociationFloatSumAggregator(String field, float[] sumArray) throws IOException { this.field = field; - associations = new FloatAssociationsPayloadIterator(field, new CategoryFloatAssociation()); + associations = new FloatAssociationsIterator(field, new CategoryFloatAssociation()); this.sumArray = sumArray; } Index: lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java (working copy) @@ -3,7 +3,7 @@ import java.io.IOException; import org.apache.lucene.facet.associations.CategoryIntAssociation; -import org.apache.lucene.facet.associations.IntAssociationsPayloadIterator; +import org.apache.lucene.facet.associations.IntAssociationsIterator; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.search.aggregator.Aggregator; import org.apache.lucene.index.AtomicReaderContext; @@ -37,15 +37,15 @@ protected final String field; protected final int[] sumArray; - protected final IntAssociationsPayloadIterator associations; + protected final IntAssociationsIterator associations; public AssociationIntSumAggregator(int[] sumArray) throws IOException { - this(CategoryListParams.DEFAULT_TERM.field(), sumArray); + this(CategoryListParams.DEFAULT_FIELD, sumArray); } public AssociationIntSumAggregator(String field, int[] sumArray) throws IOException { this.field = field; - associations = new IntAssociationsPayloadIterator(field, new CategoryIntAssociation()); + associations = new IntAssociationsIterator(field, new CategoryIntAssociation()); this.sumArray = sumArray; } Index: lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java (working copy) @@ -32,6 +32,7 @@ * @lucene.experimental */ public class CategoryListCache { + // nocommit do we still need this class, now that facets are on DocValues? private HashMap cldMap = new HashMap(); Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java (working copy) @@ -28,7 +28,10 @@ /** * A {@link FacetRequest} for weighting facets according to their float - * association by summing the association values. + * association by summing the association values. Note that this class caches + * the associations data in-memory by default. You can override + * {@link #createAggregator(boolean, FacetArrays, TaxonomyReader)} to return an + * {@link AssociationFloatSumAggregator} which does otherwise. * * @lucene.experimental */ Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java (working copy) @@ -4,6 +4,7 @@ import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.aggregator.Aggregator; +import org.apache.lucene.facet.search.aggregator.associations.AssociationFloatSumAggregator; import org.apache.lucene.facet.search.aggregator.associations.AssociationIntSumAggregator; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.taxonomy.CategoryPath; @@ -28,7 +29,10 @@ /** * A {@link FacetRequest} for weighting facets according to their integer - * association by summing the association values. + * association by summing the association values. Note that this class caches + * the associations data in-memory by default. You can override + * {@link #createAggregator(boolean, FacetArrays, TaxonomyReader)} to return an + * {@link AssociationFloatSumAggregator} which does otherwise. * * @lucene.experimental */ Index: lucene/facet/src/java/org/apache/lucene/facet/util/PartitionsUtils.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/util/PartitionsUtils.java (revision 1433426) +++ lucene/facet/src/java/org/apache/lucene/facet/util/PartitionsUtils.java (working copy) @@ -28,16 +28,10 @@ */ public final class PartitionsUtils { + /** The prefix that is added to the name of the partition. */ + public static final String PART_NAME_PREFIX = "$part"; + /** - * Get the offset for a given partition. That is, what is the minimum number an - * ordinal could be for a particular partition. - */ - public final static int partitionOffset(FacetIndexingParams iParams, - int partitionNumber, final TaxonomyReader taxonomyReader) { - return partitionNumber * partitionSize(iParams, taxonomyReader); - } - - /** * Get the partition size in this parameter, or return the size of the taxonomy, which * is smaller. (Guarantees usage of as little memory as possible at search time). */ @@ -58,21 +52,18 @@ /** * Partition name by category ordinal */ - public final static String partitionNameByOrdinal( - FacetIndexingParams iParams, CategoryListParams clParams, int ordinal) { + public final static String partitionNameByOrdinal(FacetIndexingParams iParams, CategoryListParams clParams, + int ordinal) { int partition = partitionNumber(iParams, ordinal); return partitionName(clParams, partition); } - /** - * Partition name by its number - */ + /** Partition name by its number */ public final static String partitionName(CategoryListParams clParams, int partition) { - String term = clParams.getTerm().text(); if (partition == 0) { - return term; // for backwards compatibility we do not add a partition number in this case + return ""; // for backwards compatibility we do not add a partition number in this case } - return term + partition; + return PART_NAME_PREFIX + Integer.toString(partition); } } Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (working copy) @@ -266,13 +266,12 @@ FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE); String delim = String.valueOf(iParams.getFacetDelimChar()); Map res = new HashMap(); - HashSet handledTerms = new HashSet(); + HashSet handledTerms = new HashSet(); for (CategoryListParams clp : iParams.getAllCategoryListParams()) { - Term baseTerm = new Term(clp.getTerm().field()); - if (!handledTerms.add(baseTerm)) { + if (!handledTerms.add(clp.field)) { continue; // already handled this term (for another list) } - Terms terms = MultiFields.getTerms(indexReader, baseTerm.field()); + Terms terms = MultiFields.getTerms(indexReader, clp.field); if (terms == null) { continue; } Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java (working copy) @@ -2,14 +2,9 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.TextField; -import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.search.params.CountFacetRequest; @@ -23,7 +18,6 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -116,15 +110,6 @@ return collectors; } - public static void add(FacetIndexingParams iParams, RandomIndexWriter iw, - TaxonomyWriter tw, String... strings) throws IOException { - Document d = new Document(); - FacetFields facetFields = new FacetFields(tw, iParams); - facetFields.addFields(d, Collections.singletonList(new CategoryPath(strings))); - d.add(new TextField("content", "alpha", Field.Store.YES)); - iw.addDocument(d); - } - public static class IndexTaxonomyReaderPair { public DirectoryReader indexReader; public DirectoryTaxonomyReader taxReader; Index: lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java (working copy) @@ -1,11 +1,8 @@ package org.apache.lucene.facet.index.params; -import org.apache.lucene.index.Term; +import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.facet.index.params.CategoryListParams; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -28,7 +25,7 @@ @Test public void testDefaultSettings() { CategoryListParams clp = new CategoryListParams(); - assertEquals("wrong default term", new Term("$facets", "$fulltree$"), clp.getTerm()); + assertEquals("wrong default field", "$facets", clp.field); assertEquals("unexpected default encoder", "Sorting (Unique (DGap (VInt8)))", clp.createEncoder().toString()); assertEquals("unexpected default decoder", "DGap (VInt8)", clp.createEncoder().createMatchingDecoder().toString()); } @@ -64,8 +61,8 @@ clParams1.hashCode(), clParams2.hashCode()); // Test 2 CategoryListParams with the same specified Term - clParams1 = new CategoryListParams(new Term("test")); - clParams2 = new CategoryListParams(new Term("test")); + clParams1 = new CategoryListParams("test"); + clParams2 = new CategoryListParams("test"); assertEquals( "2 CategoryListParams with the same term should equal each other.", clParams1, clParams2); @@ -73,8 +70,8 @@ clParams1.hashCode(), clParams2.hashCode()); // Test 2 CategoryListParams with DIFFERENT terms - clParams1 = new CategoryListParams(new Term("test1")); - clParams2 = new CategoryListParams(new Term("test2")); + clParams1 = new CategoryListParams("test1"); + clParams2 = new CategoryListParams("test2"); assertFalse( "2 CategoryListParams with the different terms should NOT equal each other.", clParams1.equals(clParams2)); Index: lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java (working copy) @@ -35,8 +35,7 @@ assertNotNull("Missing default category list", dfip.getAllCategoryListParams()); assertEquals("all categories have the same CategoryListParams by default", dfip.getCategoryListParams(null), dfip.getCategoryListParams(new CategoryPath("a"))); - assertEquals("Expected default category list term is $facets:$fulltree$", - new Term("$facets", "$fulltree$"), dfip.getCategoryListParams(null).getTerm()); + assertEquals("Expected default category list field is $facets", "$facets", dfip.getCategoryListParams(null).field); String expectedDDText = "a" + dfip.getFacetDelimChar() + "b"; CategoryPath cp = new CategoryPath("a", "b"); @@ -48,13 +47,13 @@ assertEquals("wrong drill-down term text", expectedDDText, new String( buf, 0, numchars)); CategoryListParams clParams = dfip.getCategoryListParams(null); - assertEquals("partition for all ordinals is the first", "$fulltree$", + assertEquals("partition for all ordinals is the first", "", PartitionsUtils.partitionNameByOrdinal(dfip, clParams , 250)); assertEquals("for partition 0, the same name should be returned", - "$fulltree$", PartitionsUtils.partitionName(clParams, 0)); + "", PartitionsUtils.partitionName(clParams, 0)); assertEquals( "for any other, it's the concatenation of name + partition", - "$fulltree$1", PartitionsUtils.partitionName(clParams, 1)); + PartitionsUtils.PART_NAME_PREFIX + "1", PartitionsUtils.partitionName(clParams, 1)); assertEquals("default partition number is always 0", 0, PartitionsUtils.partitionNumber(dfip,100)); assertEquals("default partition size is unbounded", Integer.MAX_VALUE, @@ -63,11 +62,9 @@ @Test public void testCategoryListParamsWithDefaultIndexingParams() { - CategoryListParams clp = new CategoryListParams( - new Term("clp", "value")); + CategoryListParams clp = new CategoryListParams("clp"); FacetIndexingParams dfip = new FacetIndexingParams(clp); - assertEquals("Expected default category list term is " + clp.getTerm(), - clp.getTerm(), dfip.getCategoryListParams(null).getTerm()); + assertEquals("Expected default category list field is " + clp.field, clp.field, dfip.getCategoryListParams(null).field); } @Test Index: lucene/facet/src/test/org/apache/lucene/facet/index/params/PerDimensionIndexingParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/params/PerDimensionIndexingParamsTest.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/index/params/PerDimensionIndexingParamsTest.java (working copy) @@ -32,44 +32,31 @@ public void testTopLevelSettings() { FacetIndexingParams ifip = new PerDimensionIndexingParams(Collections.emptyMap()); assertNotNull("Missing default category list", ifip.getAllCategoryListParams()); - assertEquals( - "Expected default category list term is $facets:$fulltree$", - new Term("$facets", "$fulltree$"), ifip.getCategoryListParams( - null).getTerm()); - String expectedDDText = "a" - + ifip.getFacetDelimChar() + "b"; + assertEquals("Expected default category list field is $facets", "$facets", ifip.getCategoryListParams(null).field); + String expectedDDText = "a" + ifip.getFacetDelimChar() + "b"; CategoryPath cp = new CategoryPath("a", "b"); - assertEquals("wrong drill-down term", new Term("$facets", - expectedDDText), DrillDown.term(ifip,cp)); + assertEquals("wrong drill-down term", new Term("$facets", expectedDDText), DrillDown.term(ifip,cp)); char[] buf = new char[20]; int numchars = ifip.drillDownTermText(cp, buf); assertEquals("3 characters should be written", 3, numchars); - assertEquals("wrong drill-down term text", expectedDDText, new String( - buf, 0, numchars)); + assertEquals("wrong drill-down term text", expectedDDText, new String(buf, 0, numchars)); CategoryListParams clParams = ifip.getCategoryListParams(null); - assertEquals("partition for all ordinals is the first", "$fulltree$", - PartitionsUtils.partitionNameByOrdinal(ifip, clParams , 250)); - assertEquals("for partition 0, the same name should be returned", - "$fulltree$", PartitionsUtils.partitionName(clParams, 0)); - assertEquals( - "for any other, it's the concatenation of name + partition", - "$fulltree$1", PartitionsUtils.partitionName(clParams, 1)); - assertEquals("default partition number is always 0", 0, - PartitionsUtils.partitionNumber(ifip,100)); - - assertEquals("default partition size is unbounded", Integer.MAX_VALUE, - ifip.getPartitionSize()); + assertEquals("partition for all ordinals is the first", "", PartitionsUtils.partitionNameByOrdinal(ifip, clParams , 250)); + assertEquals("for partition 0, the same name should be returned", "", PartitionsUtils.partitionName(clParams, 0)); + assertEquals("for any other, it's the concatenation of name + partition", PartitionsUtils.PART_NAME_PREFIX + "1", PartitionsUtils.partitionName(clParams, 1)); + assertEquals("default partition number is always 0", 0, PartitionsUtils.partitionNumber(ifip,100)); + assertEquals("default partition size is unbounded", Integer.MAX_VALUE, ifip.getPartitionSize()); } @Test public void testCategoryListParamsAddition() { - CategoryListParams clp = new CategoryListParams(new Term("clp", "value")); + CategoryListParams clp = new CategoryListParams("clp"); PerDimensionIndexingParams tlfip = new PerDimensionIndexingParams( Collections. singletonMap(new CategoryPath("a"), clp)); - assertEquals("Expected category list term is " + clp.getTerm(), - clp.getTerm(), tlfip.getCategoryListParams(new CategoryPath("a")).getTerm()); - assertNotSame("Unexpected default category list " + clp.getTerm(), clp, tlfip.getCategoryListParams(null)); + assertEquals("Expected category list field is " + clp.field, + clp.field, tlfip.getCategoryListParams(new CategoryPath("a")).field); + assertNotSame("Unexpected default category list " + clp.field, clp, tlfip.getCategoryListParams(null)); } } \ No newline at end of file Index: lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java (working copy) @@ -1,23 +1,15 @@ package org.apache.lucene.facet.search; -import java.io.IOException; -import java.io.Reader; import java.util.HashSet; import java.util.Set; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.TextField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; @@ -48,42 +40,6 @@ public class CategoryListIteratorTest extends LuceneTestCase { - private static final class DataTokenStream extends TokenStream { - - private final PayloadAttribute payload = addAttribute(PayloadAttribute.class); - private final BytesRef buf; - private final IntEncoder encoder; - private final CharTermAttribute term = addAttribute(CharTermAttribute.class); - - private int idx; - private boolean exhausted = false; - - public DataTokenStream(String text, IntEncoder encoder) { - this.encoder = encoder; - term.setEmpty().append(text); - buf = new BytesRef(); - payload.setPayload(buf); - } - - public void setIdx(int idx) { - this.idx = idx; - exhausted = false; - } - - @Override - public boolean incrementToken() throws IOException { - if (exhausted) { - return false; - } - - // must copy because encoders may change the buffer - encoder.encode(IntsRef.deepCopyOf(data[idx]), buf); - exhausted = true; - return true; - } - - } - static final IntsRef[] data = new IntsRef[] { new IntsRef(new int[] { 1, 2 }, 0, 2), new IntsRef(new int[] { 3, 4 }, 0, 2), @@ -95,13 +51,13 @@ public void testPayloadCategoryListIteraor() throws Exception { Directory dir = newDirectory(); final IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))); - DataTokenStream dts = new DataTokenStream("1",encoder); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy())); + BytesRef buf = new BytesRef(); for (int i = 0; i < data.length; i++) { - dts.setIdx(i); Document doc = new Document(); - doc.add(new TextField("f", dts)); + encoder.encode(IntsRef.deepCopyOf(data[i]), buf); + doc.add(new StraightBytesDocValuesField("f", buf)); writer.addDocument(doc); } IndexReader reader = writer.getReader(); @@ -109,9 +65,9 @@ int totalCategories = 0; IntsRef ordinals = new IntsRef(); - CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder()); + CategoryListIterator cli = new DocValuesCategoryListIterator("f", encoder.createMatchingDecoder()); for (AtomicReaderContext context : reader.leaves()) { - cli.setNextReader(context); + assertTrue("failed to initalize iterator", cli.setNextReader(context)); int maxDoc = context.reader().maxDoc(); int dataIdx = context.docBase; for (int doc = 0; doc < maxDoc; doc++, dataIdx++) { @@ -136,24 +92,17 @@ public void testPayloadIteratorWithInvalidDoc() throws Exception { Directory dir = newDirectory(); final IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))); - DataTokenStream dts = new DataTokenStream("1", encoder); - // this test requires that no payloads ever be randomly present! - final Analyzer noPayloadsAnalyzer = new Analyzer() { - @Override - public TokenStreamComponents createComponents(String fieldName, Reader reader) { - return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.KEYWORD, false)); - } - }; // NOTE: test is wired to LogMP... because test relies on certain docids having payloads RandomIndexWriter writer = new RandomIndexWriter(random(), dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, noPayloadsAnalyzer).setMergePolicy(newLogMergePolicy())); + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < data.length; i++) { Document doc = new Document(); if (i == 0) { - dts.setIdx(i); - doc.add(new TextField("f", dts)); // only doc 0 has payloads! + BytesRef buf = new BytesRef(); + encoder.encode(IntsRef.deepCopyOf(data[i]), buf ); + doc.add(new StraightBytesDocValuesField("f", buf)); } else { - doc.add(new TextField("f", "1", Field.Store.NO)); + doc.add(new StraightBytesDocValuesField("f", new BytesRef())); } writer.addDocument(doc); writer.commit(); @@ -164,9 +113,9 @@ int totalCategories = 0; IntsRef ordinals = new IntsRef(); - CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder()); + CategoryListIterator cli = new DocValuesCategoryListIterator("f", encoder.createMatchingDecoder()); for (AtomicReaderContext context : reader.leaves()) { - cli.setNextReader(context); + assertTrue("failed to initalize iterator", cli.setNextReader(context)); int maxDoc = context.reader().maxDoc(); int dataIdx = context.docBase; for (int doc = 0; doc < maxDoc; doc++, dataIdx++) { @@ -176,13 +125,13 @@ } cli.getOrdinals(doc, ordinals); if (dataIdx == 0) { - assertTrue("document 0 must have a payload", ordinals.length > 0); + assertTrue("document 0 must have ordinals", ordinals.length > 0); for (int j = 0; j < ordinals.length; j++) { assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j])); } totalCategories += ordinals.length; } else { - assertTrue("only document 0 should have a payload", ordinals.length == 0); + assertTrue("only document 0 should have ordinals", ordinals.length == 0); } } } Index: lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java (working copy) @@ -60,8 +60,8 @@ public DrillDownTest() { Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("a"), new CategoryListParams(new Term("testing_facets_a", "a"))); - paramsMap.put(new CategoryPath("b"), new CategoryListParams(new Term("testing_facets_b", "b"))); + paramsMap.put(new CategoryPath("a"), new CategoryListParams("testing_facets_a")); + paramsMap.put(new CategoryPath("b"), new CategoryListParams("testing_facets_b")); nonDefaultParams = new PerDimensionIndexingParams(paramsMap); } @@ -113,8 +113,8 @@ } @Test - public void testTermDefault() { - String defaultField = CategoryListParams.DEFAULT_TERM.field(); + public void testDefaultField() { + String defaultField = CategoryListParams.DEFAULT_FIELD; Term termA = DrillDown.term(defaultParams, new CategoryPath("a")); assertEquals(new Term(defaultField, "a"), termA); Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java (working copy) @@ -10,14 +10,18 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.taxonomy.CategoryPath; @@ -25,23 +29,19 @@ import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; -import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; import org.junit.Test; /* @@ -63,6 +63,18 @@ public class TestMultipleCategoryLists extends LuceneTestCase { + private static final CategoryPath[] CATEGORIES = new CategoryPath[] { + new CategoryPath("Author", "Mark Twain"), + new CategoryPath("Author", "Stephen King"), + new CategoryPath("Author", "Kurt Vonnegut"), + new CategoryPath("Band", "Rock & Pop", "The Beatles"), + new CategoryPath("Band", "Punk", "The Ramones"), + new CategoryPath("Band", "Rock & Pop", "U2"), + new CategoryPath("Band", "Rock & Pop", "REM"), + new CategoryPath("Band", "Rock & Pop", "Dave Matthews Band"), + new CategoryPath("Composer", "Bach"), + }; + @Test public void testDefault() throws Exception { Directory[][] dirs = getDirs(); @@ -72,9 +84,6 @@ // create and open a taxonomy writer TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); - /** - * Configure with no custom counting lists - */ PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(Collections.emptyMap()); seedIndex(iw, tw, iParams); @@ -88,19 +97,14 @@ // prepare searcher to search against IndexSearcher searcher = newSearcher(ir); - FacetsCollector facetsCollector = performSearch(iParams, tr, ir, - searcher); + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher); // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - DocsEnum td = _TestUtil.docs(random(), ir, "$facets", new BytesRef("$fulltree$"), MultiFields.getLiveDocs(ir), null, DocsEnum.FLAG_NONE); - assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertOrdinalsExist("$facets", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } @@ -111,12 +115,10 @@ RandomIndexWriter iw = new RandomIndexWriter(random(), dirs[0][0], newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer - TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], - OpenMode.CREATE); + TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); PerDimensionIndexingParams iParams = new PerDimensionIndexingParams( - Collections.singletonMap(new CategoryPath("Author"), - new CategoryListParams(new Term("$author", "Authors")))); + Collections.singletonMap(new CategoryPath("Author"), new CategoryListParams("$author"))); seedIndex(iw, tw, iParams); IndexReader ir = iw.getReader(); @@ -133,13 +135,10 @@ // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - assertPostingListExists("$facets", "$fulltree$", ir); - assertPostingListExists("$author", "Authors", ir); + assertOrdinalsExist("$facets", ir); + assertOrdinalsExist("$author", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } @@ -150,12 +149,11 @@ RandomIndexWriter iw = new RandomIndexWriter(random(), dirs[0][0], newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer - TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], - OpenMode.CREATE); + TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("Band"), new CategoryListParams(new Term("$music", "Bands"))); - paramsMap.put(new CategoryPath("Composer"), new CategoryListParams(new Term("$music", "Composers"))); + paramsMap.put(new CategoryPath("Band"), new CategoryListParams("$music")); + paramsMap.put(new CategoryPath("Composer"), new CategoryListParams("$music")); PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(paramsMap); seedIndex(iw, tw, iParams); @@ -168,26 +166,27 @@ // prepare searcher to search against IndexSearcher searcher = newSearcher(ir); - FacetsCollector facetsCollector = performSearch(iParams, tr, ir, - searcher); + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher); // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - assertPostingListExists("$facets", "$fulltree$", ir); - assertPostingListExists("$music", "Bands", ir); - assertPostingListExists("$music", "Composers", ir); + assertOrdinalsExist("$facets", ir); + assertOrdinalsExist("$music", ir); + assertOrdinalsExist("$music", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } - private void assertPostingListExists(String field, String text, IndexReader ir) throws IOException { - DocsEnum de = _TestUtil.docs(random(), ir, field, new BytesRef(text), null, null, DocsEnum.FLAG_NONE); - assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + private void assertOrdinalsExist(String field, IndexReader ir) throws IOException { + for (AtomicReaderContext context : ir.leaves()) { + AtomicReader r = context.reader(); + if (r.docValues(field) != null) { + return; // not all segments must have this DocValues + } + } + fail("no ordinals found for " + field); } @Test @@ -200,8 +199,8 @@ TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("Band"), new CategoryListParams(new Term("$bands", "Bands"))); - paramsMap.put(new CategoryPath("Composer"), new CategoryListParams(new Term("$composers", "Composers"))); + paramsMap.put(new CategoryPath("Band"), new CategoryListParams("$bands")); + paramsMap.put(new CategoryPath("Composer"), new CategoryListParams("$composers")); PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(paramsMap); seedIndex(iw, tw, iParams); @@ -214,18 +213,15 @@ // prepare searcher to search against IndexSearcher searcher = newSearcher(ir); - FacetsCollector facetsCollector = performSearch(iParams, tr, ir, - searcher); + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher); // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - assertPostingListExists("$facets", "$fulltree$", ir); - assertPostingListExists("$bands", "Bands", ir); - assertPostingListExists("$composers", "Composers", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + assertOrdinalsExist("$facets", ir); + assertOrdinalsExist("$bands", ir); + assertOrdinalsExist("$composers", ir); + + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } @@ -236,13 +232,12 @@ RandomIndexWriter iw = new RandomIndexWriter(random(), dirs[0][0], newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false))); // create and open a taxonomy writer - TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], - OpenMode.CREATE); + TaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0][1], OpenMode.CREATE); Map paramsMap = new HashMap(); - paramsMap.put(new CategoryPath("Band"), new CategoryListParams(new Term("$music", "music"))); - paramsMap.put(new CategoryPath("Composer"), new CategoryListParams(new Term("$music", "music"))); - paramsMap.put(new CategoryPath("Author"), new CategoryListParams(new Term("$literature", "Authors"))); + paramsMap.put(new CategoryPath("Band"), new CategoryListParams("$music")); + paramsMap.put(new CategoryPath("Composer"), new CategoryListParams("$music")); + paramsMap.put(new CategoryPath("Author"), new CategoryListParams("$literature")); PerDimensionIndexingParams iParams = new PerDimensionIndexingParams(paramsMap); seedIndex(iw, tw, iParams); @@ -256,18 +251,14 @@ // prepare searcher to search against IndexSearcher searcher = newSearcher(ir); - FacetsCollector facetsCollector = performSearch(iParams, tr, ir, - searcher); + FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher); // Obtain facets results and hand-test them assertCorrectResults(facetsCollector); - assertPostingListExists("$music", "music", ir); - assertPostingListExists("$literature", "Authors", ir); + assertOrdinalsExist("$music", ir); + assertOrdinalsExist("$literature", ir); - tr.close(); - ir.close(); - iw.close(); - tw.close(); + IOUtils.close(tr, ir, iw, tw); IOUtils.close(dirs[0]); } @@ -275,14 +266,12 @@ return FacetTestUtils.createIndexTaxonomyDirs(1); } - private void assertCorrectResults(FacetsCollector facetsCollector) - throws IOException { + private void assertCorrectResults(FacetsCollector facetsCollector) throws IOException { List res = facetsCollector.getFacetResults(); FacetResult results = res.get(0); FacetResultNode resNode = results.getFacetResultNode(); - Iterable subResults = resNode - .getSubResults(); + Iterable subResults = resNode.getSubResults(); Iterator subIter = subResults.iterator(); checkResult(resNode, "Band", 5.0); @@ -325,9 +314,8 @@ checkResult(subIter.next(), "Band/Rock & Pop/The Beatles", 1.0); } - private FacetsCollector performSearch(FacetIndexingParams iParams, - TaxonomyReader tr, IndexReader ir, - IndexSearcher searcher) throws IOException { + private FacetsCollector performSearch(FacetIndexingParams iParams, TaxonomyReader tr, IndexReader ir, + IndexSearcher searcher) throws IOException { // step 1: collect matching documents into a collector Query q = new MatchAllDocsQuery(); TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, true); @@ -344,7 +332,6 @@ // Faceted search parameters indicate which facets are we interested in FacetSearchParams facetSearchParams = new FacetSearchParams(facetRequests, iParams); - // perform documents search and facets accumulation FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, ir, tr); @@ -352,27 +339,19 @@ return facetsCollector; } - private void seedIndex(RandomIndexWriter iw, TaxonomyWriter tw, - FacetIndexingParams iParams) throws IOException { - FacetTestUtils.add(iParams, iw, tw, "Author", "Mark Twain"); - FacetTestUtils.add(iParams, iw, tw, "Author", "Stephen King"); - FacetTestUtils.add(iParams, iw, tw, "Author", "Kurt Vonnegut"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", - "The Beatles"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Punk", "The Ramones"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", "U2"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", "REM"); - FacetTestUtils.add(iParams, iw, tw, "Band", "Rock & Pop", - "Dave Matthews Band"); - FacetTestUtils.add(iParams, iw, tw, "Composer", "Bach"); + private void seedIndex(RandomIndexWriter iw, TaxonomyWriter tw, FacetIndexingParams iParams) throws IOException { + FacetFields facetFields = new FacetFields(tw, iParams); + for (CategoryPath cp : CATEGORIES) { + Document doc = new Document(); + facetFields.addFields(doc, Collections.singletonList(cp)); + doc.add(new TextField("content", "alpha", Field.Store.YES)); + iw.addDocument(doc); + } } private static void checkResult(FacetResultNode sub, String label, double value) { - assertEquals("Label of subresult " + sub.getLabel() + " was incorrect", - label, sub.getLabel().toString()); - assertEquals( - "Value for " + sub.getLabel() + " subresult was incorrect", - value, sub.getValue(), 0.0); + assertEquals("Label of subresult " + sub.getLabel() + " was incorrect", label, sub.getLabel().toString()); + assertEquals("Value for " + sub.getLabel() + " subresult was incorrect", value, sub.getValue(), 0.0); } } \ No newline at end of file Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java (working copy) @@ -93,6 +93,7 @@ indexTwoDocs(indexWriter, null, false); // 4th segment, no content, or categories indexTwoDocs(indexWriter, null, true); // 5th segment, with content, no categories indexTwoDocs(indexWriter, facetFields, true); // 6th segment, with content, with categories + indexTwoDocs(indexWriter, null, true); // 7th segment, with content, no categories IOUtils.close(indexWriter, taxoWriter); DirectoryReader indexReader = DirectoryReader.open(indexDir); Index: lucene/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java (working copy) @@ -32,12 +32,12 @@ @Test(expected=IllegalArgumentException.class) public void testIllegalNumResults() throws Exception { - new CountFacetRequest(new CategoryPath("a", "b"), 0); + assertNotNull(new CountFacetRequest(new CategoryPath("a", "b"), 0)); } @Test(expected=IllegalArgumentException.class) public void testIllegalCategoryPath() throws Exception { - new CountFacetRequest(null, 1); + assertNotNull(new CountFacetRequest(null, 1)); } @Test Index: lucene/facet/src/test/org/apache/lucene/facet/search/params/FacetSearchParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/params/FacetSearchParamsTest.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/search/params/FacetSearchParamsTest.java (working copy) @@ -1,13 +1,5 @@ package org.apache.lucene.facet.search.params; -import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.facet.taxonomy.TaxonomyWriter; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; -import org.apache.lucene.facet.util.PartitionsUtils; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; @@ -31,28 +23,6 @@ public class FacetSearchParamsTest extends LuceneTestCase { @Test - public void testAddFacetRequest() throws Exception { - FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("a", "b"), 1)); - assertEquals("expected 1 facet request", 1, fsp.getFacetRequests().size()); - } - - @Test - public void testPartitionSizeWithCategories() throws Exception { - Directory dir = newDirectory(); - TaxonomyWriter tw = new DirectoryTaxonomyWriter(dir); - tw.addCategory(new CategoryPath("a")); - tw.commit(); - tw.close(); - TaxonomyReader tr = new DirectoryTaxonomyReader(dir); - assertEquals("unexpected partition offset for 1 categories", 2, - PartitionsUtils.partitionOffset(FacetIndexingParams.ALL_PARENTS, 1, tr)); - assertEquals("unexpected partition size for 1 categories", 2, - PartitionsUtils.partitionSize(FacetIndexingParams.ALL_PARENTS,tr)); - tr.close(); - dir.close(); - } - - @Test public void testSearchParamsWithNullRequest() throws Exception { try { assertNull(new FacetSearchParams()); Index: lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java (revision 1433426) +++ lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java (working copy) @@ -9,7 +9,7 @@ import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; import org.apache.lucene.facet.search.CategoryListIterator; -import org.apache.lucene.facet.search.PayloadCategoryListIteraor; +import org.apache.lucene.facet.search.DocValuesCategoryListIterator; import org.apache.lucene.facet.search.cache.CategoryListCache; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; @@ -20,7 +20,6 @@ import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; @@ -60,7 +59,7 @@ HashMap clps = new HashMap(); for (String dim : dimensions) { CategoryPath cp = new CategoryPath(dim); - CategoryListParams clp = new CategoryListParams(new Term("$" + dim, CategoryListParams.DEFAULT_TERM.bytes())); + CategoryListParams clp = new CategoryListParams("$" + dim); clps.put(cp, clp); } PerDimensionIndexingParams indexingParams = new PerDimensionIndexingParams(clps); @@ -101,7 +100,7 @@ clCache.loadAndRegister(clp, indexReader, taxoReader, indexingParams); iterators[i] = clCache.get(clp).iterator(0); // no partitions } else { - iterators[i] = new PayloadCategoryListIteraor(clp.getTerm(), decoder); + iterators[i] = new DocValuesCategoryListIterator(clp.field, decoder); } } MultiCategoryListIterator cli = new MultiCategoryListIterator(iterators);