Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1417812) +++ lucene/CHANGES.txt (working copy) @@ -64,6 +64,11 @@ even if the commitData is the only thing that changes. (Shai Erera, Michael McCandless) +* LUCENE-4565: TaxonomyReader.getParentArray and .getChildrenArrays consolidated + into one getParallelTaxonomyArrays(). You can obtain the 3 arrays that the + previous two methods returned by calling parents(), children() or siblings() + on the returned ParallelTaxonomyArrays. (Shai Erera) + New Features * LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (revision 1417812) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (working copy) @@ -6,10 +6,10 @@ import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; import org.apache.lucene.facet.search.results.MutableFacetResultNode; -import org.apache.lucene.facet.search.results.IntermediateFacetResult; -import org.apache.lucene.facet.taxonomy.ChildrenArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; import org.apache.lucene.facet.util.ResultSortUtils; /* @@ -123,9 +123,9 @@ MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) throws IOException { int partitionSize = facetArrays.getArraysLength(); int endOffset = offset + partitionSize; - ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays(); - int[] youngestChild = childrenArray.getYoungestChildArray(); - int[] olderSibling = childrenArray.getOlderSiblingArray(); + ParallelTaxonomyArrays childrenArray = taxonomyReader.getParallelTaxonomyArrays(); + int[] children = childrenArray.children(); + int[] siblings = childrenArray.siblings(); FacetResultNode reusable = null; int localDepth = 0; int depth = facetRequest.getDepth(); @@ -134,9 +134,9 @@ int tosOrdinal; // top of stack element - int yc = youngestChild[ordinal]; + int yc = children[ordinal]; while (yc >= endOffset) { - yc = olderSibling[yc]; + yc = siblings[yc]; } // make use of the fact that TaxonomyReader.INVALID_ORDINAL == -1, < endOffset // and it, too, can stop the loop. @@ -161,7 +161,7 @@ // need to proceed to its sibling localDepth--; // change element now on top of stack to its sibling. - ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]]; + ordinalStack[localDepth] = siblings[ordinalStack[localDepth]]; continue; } // top of stack is not invalid, this is the first time we see it on top of stack. @@ -187,9 +187,9 @@ } if (localDepth < depth) { // push kid of current tos - yc = youngestChild[tosOrdinal]; + yc = children[tosOrdinal]; while (yc >= endOffset) { - yc = olderSibling[yc]; + yc = siblings[yc]; } ordinalStack[++localDepth] = yc; } else { // localDepth == depth; current tos exhausted its possible children, mark this by pushing INVALID_ORDINAL Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (revision 1417812) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (working copy) @@ -4,16 +4,15 @@ import java.util.ArrayList; import java.util.List; -import org.apache.lucene.util.PriorityQueue; - import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest.SortOrder; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; import org.apache.lucene.facet.search.results.MutableFacetResultNode; -import org.apache.lucene.facet.search.results.IntermediateFacetResult; -import org.apache.lucene.facet.taxonomy.ChildrenArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; +import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.collections.IntIterator; import org.apache.lucene.util.collections.IntToObjectMap; @@ -141,9 +140,9 @@ } int endOffset = offset + partitionSize; // one past the largest ordinal in the partition - ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays(); - int[] youngestChild = childrenArray.getYoungestChildArray(); - int[] olderSibling = childrenArray.getOlderSiblingArray(); + ParallelTaxonomyArrays childrenArray = taxonomyReader.getParallelTaxonomyArrays(); + int[] children = childrenArray.children(); + int[] siblings = childrenArray.siblings(); int totalNumOfDescendantsConsidered = 0; // total number of facets with value != 0, // in the tree. These include those selected as top K in each node, and all the others that // were not. Not including rootNode @@ -217,7 +216,7 @@ * we can continue to the older sibling of rootNode once the localDepth goes down, before we verify that * it went that down) */ - ordinalStack[++localDepth] = youngestChild[rootNode]; + ordinalStack[++localDepth] = children[rootNode]; siblingExplored[localDepth] = Integer.MAX_VALUE; // we have not verified position wrt current partition siblingExplored[0] = -1; // as if rootNode resides to the left of current position @@ -238,7 +237,7 @@ // its child, now just removed, would not have been pushed on it. // so the father is either inside the partition, or smaller ordinal if (siblingExplored[localDepth] < 0 ) { - ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]]; + ordinalStack[localDepth] = siblings[ordinalStack[localDepth]]; continue; } // in this point, siblingExplored[localDepth] between 0 and number of bestSiblings @@ -264,7 +263,7 @@ //tosOrdinal was not examined yet for its position relative to current partition // and the best K of current partition, among its siblings, have not been determined yet while (tosOrdinal >= endOffset) { - tosOrdinal = olderSibling[tosOrdinal]; + tosOrdinal = siblings[tosOrdinal]; } // now it is inside. Run it and all its siblings inside the partition through a heap // and in doing so, count them, find best K, and sum into residue @@ -297,12 +296,12 @@ // update totalNumOfDescendants by the now excluded node and all its descendants totalNumOfDescendantsConsidered--; // reduce the 1 earned when the excluded node entered the heap // and now return it and all its descendants. These will never make it to FacetResult - totalNumOfDescendantsConsidered += countOnly (ac.ordinal, youngestChild, - olderSibling, arrays, partitionSize, offset, endOffset, localDepth, depth); + totalNumOfDescendantsConsidered += countOnly (ac.ordinal, children, + siblings, arrays, partitionSize, offset, endOffset, localDepth, depth); reusables[++tosReuslables] = ac; } } - tosOrdinal = olderSibling[tosOrdinal]; + tosOrdinal = siblings[tosOrdinal]; } // now pq has best K children of ordinals that belong to the given partition. // Populate a new AACO with them. @@ -343,7 +342,7 @@ ordinalStack[++localDepth] = TaxonomyReader.INVALID_ORDINAL; continue; } - ordinalStack[++localDepth] = youngestChild[tosOrdinal]; + ordinalStack[++localDepth] = children[tosOrdinal]; siblingExplored[localDepth] = Integer.MAX_VALUE; } // endof loop while stack is not empty Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java (revision 1417812) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java (working copy) @@ -1,87 +0,0 @@ -package org.apache.lucene.facet.taxonomy; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Equivalent representations of the taxonomy's parent info, - * used internally for efficient computation of facet results: - * "youngest child" and "oldest sibling" - */ -public class ChildrenArrays { - - private final int[] youngestChild, olderSibling; - - public ChildrenArrays(int[] parents) { - this(parents, null); - } - - public ChildrenArrays(int[] parents, ChildrenArrays copyFrom) { - youngestChild = new int[parents.length]; - olderSibling = new int[parents.length]; - int first = 0; - if (copyFrom != null) { - System.arraycopy(copyFrom.getYoungestChildArray(), 0, youngestChild, 0, copyFrom.getYoungestChildArray().length); - System.arraycopy(copyFrom.getOlderSiblingArray(), 0, olderSibling, 0, copyFrom.getOlderSiblingArray().length); - first = copyFrom.getOlderSiblingArray().length; - } - computeArrays(parents, first); - } - - private void computeArrays(int[] parents, int first) { - // reset the youngest child of all ordinals. while this should be done only - // for the leaves, we don't know up front which are the leaves, so we reset - // all of them. - for (int i = first; i < parents.length; i++) { - youngestChild[i] = TaxonomyReader.INVALID_ORDINAL; - } - - // the root category has no parent, and therefore no siblings - if (first == 0) { - first = 1; - olderSibling[0] = TaxonomyReader.INVALID_ORDINAL; - } - - for (int i = first; i < parents.length; i++) { - // note that parents[i] is always < i, so the right-hand-side of - // the following line is already set when we get here - olderSibling[i] = youngestChild[parents[i]]; - youngestChild[parents[i]] = i; - } - } - - /** - * Returns an {@code int[]} the size of the taxonomy listing for each category - * the ordinal of its immediate older sibling (the sibling in the taxonomy - * tree with the highest ordinal below that of the given ordinal). The value - * for a category with no older sibling is {@link TaxonomyReader#INVALID_ORDINAL}. - */ - public int[] getOlderSiblingArray() { - return olderSibling; - } - - /** - * Returns an {@code int[]} the size of the taxonomy listing the ordinal of - * the youngest (highest numbered) child category of each category in the - * taxonomy. The value for a leaf category (a category without children) is - * {@link TaxonomyReader#INVALID_ORDINAL}. - */ - public int[] getYoungestChildArray() { - return youngestChild; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (revision 1417812) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (working copy) @@ -5,6 +5,7 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; import org.apache.lucene.store.AlreadyClosedException; /* @@ -162,7 +163,7 @@ } /** - * Returns a {@link ChildrenArrays} object which can be used together to + * Returns a {@link ParallelTaxonomyArrays} object which can be used to * efficiently enumerate the children of any category. *
* The caller can hold on to the object it got indefinitely - it is guaranteed @@ -171,7 +172,7 @@ * as read-only and not modify it, because other callers might have * gotten the same object too. */ - public abstract ChildrenArrays getChildrenArrays() throws IOException; + public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException; /** * Retrieve user committed data. @@ -195,7 +196,6 @@ * Returns the ordinal of the parent category of the category with the given * ordinal, according to the following rules: * - * *
- * The caller can hold on to the array it got indefinitely - it is guaranteed
- * that no-one else will modify it. The other side of the same coin is that
- * the caller must treat the array it got as read-only and not modify
- * it, because other callers might have gotten the same array too (and
- * getParent() calls might be answered from the same array).
- */
- public abstract int[] getParentArray() throws IOException;
-
- /**
* Returns the path name of the category with the given ordinal. The path is
* returned as a new CategoryPath object - to reuse an existing object, use
* {@link #getPath(int, CategoryPath)}.
Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (revision 1417812)
+++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (working copy)
@@ -6,7 +6,6 @@
import java.util.logging.Logger;
import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.ChildrenArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly;
import org.apache.lucene.index.CorruptIndexException;
@@ -63,9 +62,7 @@
private LRUHashMap
+ * NOTE: you are not expected to modify the values of the arrays, since
+ * the arrays are shared with other threads.
+ *
* @lucene.experimental
*/
-class ParentArray {
+public class ParallelTaxonomyArrays {
- // TODO: maybe use PackedInts?
- private final int[] parentOrdinals;
+ private final int[] parents;
- /** Used by {@link #add(int, int)} when the array needs to grow. */
- ParentArray(int[] parentOrdinals) {
- this.parentOrdinals = parentOrdinals;
+ // the following two arrays are lazily intialized. note that we only keep a
+ // single boolean member as volatile, instead of declaring the arrays
+ // volatile. the code guarantees that only after the boolean is set to true,
+ // the arrays are returned.
+ private volatile boolean initializedChildren = false;
+ private int[] children, siblings;
+
+ /** Used by {@link #add(int, int)} after the array grew. */
+ private ParallelTaxonomyArrays(int[] parents) {
+ this.parents = parents;
}
- public ParentArray(IndexReader reader) throws IOException {
- parentOrdinals = new int[reader.maxDoc()];
- if (parentOrdinals.length > 0) {
- initFromReader(reader, 0);
+ public ParallelTaxonomyArrays(IndexReader reader) throws IOException {
+ parents = new int[reader.maxDoc()];
+ if (parents.length > 0) {
+ initParents(reader, 0);
// Starting Lucene 2.9, following the change LUCENE-1542, we can
// no longer reliably read the parent "-1" (see comment in
// LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way
@@ -51,25 +73,65 @@
// with existing indexes, so what we'll do instead is just
// hard-code the parent of ordinal 0 to be -1, and assume (as is
// indeed the case) that no other parent can be -1.
- parentOrdinals[0] = TaxonomyReader.INVALID_ORDINAL;
+ parents[0] = TaxonomyReader.INVALID_ORDINAL;
}
}
- public ParentArray(IndexReader reader, ParentArray copyFrom) throws IOException {
+ public ParallelTaxonomyArrays(IndexReader reader, ParallelTaxonomyArrays copyFrom) throws IOException {
assert copyFrom != null;
// note that copyParents.length may be equal to reader.maxDoc(). this is not a bug
// it may be caused if e.g. the taxonomy segments were merged, and so an updated
// NRT reader was obtained, even though nothing was changed. this is not very likely
// to happen.
- int[] copyParents = copyFrom.getArray();
- this.parentOrdinals = new int[reader.maxDoc()];
- System.arraycopy(copyParents, 0, parentOrdinals, 0, copyParents.length);
- initFromReader(reader, copyParents.length);
+ int[] copyParents = copyFrom.parents();
+ this.parents = new int[reader.maxDoc()];
+ System.arraycopy(copyParents, 0, parents, 0, copyParents.length);
+ initParents(reader, copyParents.length);
+
+ if (copyFrom.initializedChildren) {
+ initChildrenSiblings(copyFrom);
+ }
}
+ private final synchronized void initChildrenSiblings(ParallelTaxonomyArrays copyFrom) {
+ if (!initializedChildren) { // must do this check !
+ children = new int[parents.length];
+ siblings = new int[parents.length];
+ if (copyFrom != null) {
+ // called from the ctor, after we know copyFrom has initialized children/siblings
+ System.arraycopy(copyFrom.children(), 0, children, 0, copyFrom.children().length);
+ System.arraycopy(copyFrom.siblings(), 0, siblings, 0, copyFrom.siblings().length);
+ }
+ computeChildrenSiblings(parents, 0);
+ initializedChildren = true;
+ }
+ }
+
+ private void computeChildrenSiblings(int[] parents, int first) {
+ // reset the youngest child of all ordinals. while this should be done only
+ // for the leaves, we don't know up front which are the leaves, so we reset
+ // all of them.
+ for (int i = first; i < parents.length; i++) {
+ children[i] = TaxonomyReader.INVALID_ORDINAL;
+ }
+
+ // the root category has no parent, and therefore no siblings
+ if (first == 0) {
+ first = 1;
+ siblings[0] = TaxonomyReader.INVALID_ORDINAL;
+ }
+
+ for (int i = first; i < parents.length; i++) {
+ // note that parents[i] is always < i, so the right-hand-side of
+ // the following line is already set when we get here
+ siblings[i] = children[parents[i]];
+ children[parents[i]] = i;
+ }
+ }
+
// Read the parents of the new categories
- private void initFromReader(IndexReader reader, int first) throws IOException {
+ private void initParents(IndexReader reader, int first) throws IOException {
if (reader.maxDoc() == first) {
return;
}
@@ -93,7 +155,7 @@
throw new CorruptIndexException("Missing parent data for category " + i);
}
- parentOrdinals[i] = positions.nextPosition();
+ parents[i] = positions.nextPosition();
if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
if (i + 1 < num) {
@@ -107,24 +169,57 @@
}
}
- public int[] getArray() {
- return parentOrdinals;
- }
-
/**
* Adds the given ordinal/parent info and returns either a new instance if the
* underlying array had to grow, or this instance otherwise.
*
* NOTE: you should call this method from a thread-safe code.
*/
- ParentArray add(int ordinal, int parentOrdinal) {
- if (ordinal >= parentOrdinals.length) {
- int[] newarray = ArrayUtil.grow(parentOrdinals);
+ ParallelTaxonomyArrays add(int ordinal, int parentOrdinal) {
+ if (ordinal >= parents.length) {
+ int[] newarray = ArrayUtil.grow(parents);
newarray[ordinal] = parentOrdinal;
- return new ParentArray(newarray);
+ return new ParallelTaxonomyArrays(newarray);
}
- parentOrdinals[ordinal] = parentOrdinal;
+ parents[ordinal] = parentOrdinal;
return this;
}
+
+ /**
+ * Returns the parents array, where {@code parents[i]} denotes the parent of
+ * category ordinal {@code i}.
+ */
+ public int[] parents() {
+ return parents;
+ }
+
+ /**
+ * Returns the children array, where {@code children[i]} denotes the youngest
+ * child of category ordinal {@code i}. The youngest child is defined as the
+ * category that was added last to the taxonomy as an immediate child of
+ * {@code i}.
+ */
+ public int[] children() {
+ if (!initializedChildren) {
+ initChildrenSiblings(null);
+ }
+
+ // the array is guaranteed to be populated
+ return children;
+ }
+
+ /**
+ * Returns the siblings array, where {@code siblings[i]} denotes the sibling
+ * of category ordinal {@code i}. The sibling is defined as the previous
+ * youngest child of {@code parents[i]}.
+ */
+ public int[] siblings() {
+ if (!initializedChildren) {
+ initChildrenSiblings(null);
+ }
+
+ // the array is guaranteed to be populated
+ return siblings;
+ }
}
Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java (revision 1417812)
+++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java (working copy)
@@ -1,130 +0,0 @@
-package org.apache.lucene.facet.taxonomy.directory;
-
-import java.io.IOException;
-
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.ArrayUtil;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @lucene.experimental
- */
-class ParentArray {
-
- // TODO: maybe use PackedInts?
- private final int[] parentOrdinals;
-
- /** Used by {@link #add(int, int)} when the array needs to grow. */
- ParentArray(int[] parentOrdinals) {
- this.parentOrdinals = parentOrdinals;
- }
-
- public ParentArray(IndexReader reader) throws IOException {
- parentOrdinals = new int[reader.maxDoc()];
- if (parentOrdinals.length > 0) {
- initFromReader(reader, 0);
- // Starting Lucene 2.9, following the change LUCENE-1542, we can
- // no longer reliably read the parent "-1" (see comment in
- // LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way
- // to fix this in indexing without breaking backward-compatibility
- // with existing indexes, so what we'll do instead is just
- // hard-code the parent of ordinal 0 to be -1, and assume (as is
- // indeed the case) that no other parent can be -1.
- parentOrdinals[0] = TaxonomyReader.INVALID_ORDINAL;
- }
- }
-
- public ParentArray(IndexReader reader, ParentArray copyFrom) throws IOException {
- assert copyFrom != null;
-
- // note that copyParents.length may be equal to reader.maxDoc(). this is not a bug
- // it may be caused if e.g. the taxonomy segments were merged, and so an updated
- // NRT reader was obtained, even though nothing was changed. this is not very likely
- // to happen.
- int[] copyParents = copyFrom.getArray();
- this.parentOrdinals = new int[reader.maxDoc()];
- System.arraycopy(copyParents, 0, parentOrdinals, 0, copyParents.length);
- initFromReader(reader, copyParents.length);
- }
-
- // Read the parents of the new categories
- private void initFromReader(IndexReader reader, int first) throws IOException {
- if (reader.maxDoc() == first) {
- return;
- }
-
- // it's ok to use MultiFields because we only iterate on one posting list.
- // breaking it to loop over the leaves() only complicates code for no
- // apparent gain.
- DocsAndPositionsEnum positions = MultiFields.getTermPositionsEnum(reader, null,
- Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF,
- DocsAndPositionsEnum.FLAG_PAYLOADS);
-
- // shouldn't really happen, if it does, something's wrong
- if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) {
- throw new CorruptIndexException("Missing parent data for category " + first);
- }
-
- int num = reader.maxDoc();
- for (int i = first; i < num; i++) {
- if (positions.docID() == i) {
- if (positions.freq() == 0) { // shouldn't happen
- throw new CorruptIndexException("Missing parent data for category " + i);
- }
-
- parentOrdinals[i] = positions.nextPosition();
-
- if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
- if (i + 1 < num) {
- throw new CorruptIndexException("Missing parent data for category "+ (i + 1));
- }
- break;
- }
- } else { // this shouldn't happen
- throw new CorruptIndexException("Missing parent data for category " + i);
- }
- }
- }
-
- public int[] getArray() {
- return parentOrdinals;
- }
-
- /**
- * Adds the given ordinal/parent info and returns either a new instance if the
- * underlying array had to grow, or this instance otherwise.
- *
- * NOTE: you should call this method from a thread-safe code.
- */
- ParentArray add(int ordinal, int parentOrdinal) {
- if (ordinal >= parentOrdinals.length) {
- int[] newarray = ArrayUtil.grow(parentOrdinals);
- newarray[ordinal] = parentOrdinal;
- return new ParentArray(newarray);
- }
- parentOrdinals[ordinal] = parentOrdinal;
- return this;
- }
-
-}
Index: lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java
===================================================================
--- lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java (revision 1417812)
+++ lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java (working copy)
@@ -232,13 +232,15 @@
CategoryPath cp = new CategoryPath(requestedPath.getComponent(0));
parentOrdinal = taxo.getOrdinal(cp);
}
- parentArray = taxo.getParentArray();
+ parentArray = taxo.getParallelTaxonomyArrays().parents();
}
+ @Override
public boolean init() throws IOException {
return superCLI.init();
}
+ @Override
public long nextCategory() throws IOException {
long next;
while ((next = superCLI.nextCategory()) <= Integer.MAX_VALUE
@@ -259,6 +261,7 @@
return false;
}
+ @Override
public boolean skipTo(int docId) throws IOException {
return superCLI.skipTo(docId);
}
Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java
===================================================================
--- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (revision 1417812)
+++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (working copy)
@@ -9,6 +9,7 @@
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
@@ -545,7 +546,7 @@
fillTaxonomy(tw);
tw.close();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
- int[] parents = tr.getParentArray();
+ int[] parents = tr.getParallelTaxonomyArrays().parents();
assertEquals(tr.getSize(), parents.length);
for (int i=0; isuper.closeResources() call in your implementation.
*/
protected synchronized void closeResources() throws IOException {
- if (readerManager != null) {
+ if (initializedReaderManager) {
readerManager.close();
readerManager = null;
+ initializedReaderManager = false;
}
if (cache != null) {
cache.close();
@@ -467,15 +469,19 @@
int doc = -1;
DirectoryReader reader = readerManager.acquire();
try {
+ TermsEnum termsEnum = null; // reuse
+ DocsEnum docs = null; // reuse
final BytesRef catTerm = new BytesRef(categoryPath.toString(delimiter, prefixLen));
for (AtomicReaderContext ctx : reader.leaves()) {
Terms terms = ctx.reader().terms(Consts.FULL);
if (terms != null) {
- TermsEnum termsEnum = terms.iterator(null);
+ termsEnum = terms.iterator(termsEnum);
if (termsEnum.seekExact(catTerm, true)) {
- // TODO: is it really ok that null is passed here as liveDocs?
- DocsEnum docs = termsEnum.docs(null, null, 0);
+ // liveDocs=null because the taxonomy has no deletes
+ docs = termsEnum.docs(null, docs, 0 /* freqs not required */);
+ // if the term was found, we know it has exactly one document.
doc = docs.nextDoc() + ctx.docBase;
+ break;
}
}
}
@@ -589,7 +595,7 @@
addToCache(categoryPath, length, id);
// also add to the parent array
- parentArray = getParentArray().add(id, parent);
+ taxoArrays = getTaxoArrays().add(id, parent);
return id;
}
@@ -657,7 +663,7 @@
// NOTE: since this method is sync'ed, it can call maybeRefresh, instead of
// maybeRefreshBlocking. If ever this is changed, make sure to change the
// call too.
- if (shouldRefreshReaderManager && readerManager != null) {
+ if (shouldRefreshReaderManager && initializedReaderManager) {
readerManager.maybeRefresh();
shouldRefreshReaderManager = false;
}
@@ -791,25 +797,30 @@
// initReaderManager called in parallel.
readerManager.close();
readerManager = null;
+ initializedReaderManager = false;
}
}
}
- private ParentArray getParentArray() throws IOException {
- if (parentArray == null) {
+ private ParallelTaxonomyArrays getTaxoArrays() throws IOException {
+ if (taxoArrays == null) {
synchronized (this) {
- if (parentArray == null) {
+ if (taxoArrays == null) {
initReaderManager();
DirectoryReader reader = readerManager.acquire();
try {
- parentArray = new ParentArray(reader);
+ // according to Java Concurrency, this might perform better on some
+ // JVMs, since the object initialization doesn't happen on the
+ // volatile member.
+ ParallelTaxonomyArrays tmpArrays = new ParallelTaxonomyArrays(reader);
+ taxoArrays = tmpArrays;
} finally {
readerManager.release(reader);
}
}
}
}
- return parentArray;
+ return taxoArrays;
}
@Override
@@ -821,7 +832,7 @@
if (ordinal >= nextID) {
throw new ArrayIndexOutOfBoundsException("requested ordinal is bigger than the largest ordinal in the taxonomy");
}
- return getParentArray().getArray()[ordinal];
+ return getTaxoArrays().parents()[ordinal];
}
/**
Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParallelTaxonomyArrays.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParallelTaxonomyArrays.java (working copy)
+++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParallelTaxonomyArrays.java (working copy)
@@ -28,22 +28,44 @@
*/
/**
+ * Returns 3 arrays for traversing the taxonomy:
+ *
+ *
+ *
+ *