Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1417812) +++ lucene/CHANGES.txt (working copy) @@ -64,6 +64,11 @@ even if the commitData is the only thing that changes. (Shai Erera, Michael McCandless) +* LUCENE-4565: TaxonomyReader.getParentArray and .getChildrenArrays consolidated + into one getParallelTaxonomyArrays(). You can obtain the 3 arrays that the + previous two methods returned by calling parents(), children() or siblings() + on the returned ParallelTaxonomyArrays. (Shai Erera) + New Features * LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (revision 1417812) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (working copy) @@ -6,10 +6,10 @@ import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; import org.apache.lucene.facet.search.results.MutableFacetResultNode; -import org.apache.lucene.facet.search.results.IntermediateFacetResult; -import org.apache.lucene.facet.taxonomy.ChildrenArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; import org.apache.lucene.facet.util.ResultSortUtils; /* @@ -123,9 +123,9 @@ MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) throws IOException { int partitionSize = facetArrays.getArraysLength(); int endOffset = offset + partitionSize; - ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays(); - int[] youngestChild = childrenArray.getYoungestChildArray(); - int[] olderSibling = childrenArray.getOlderSiblingArray(); + ParallelTaxonomyArrays childrenArray = taxonomyReader.getParallelTaxonomyArrays(); + int[] children = childrenArray.children(); + int[] siblings = childrenArray.siblings(); FacetResultNode reusable = null; int localDepth = 0; int depth = facetRequest.getDepth(); @@ -134,9 +134,9 @@ int tosOrdinal; // top of stack element - int yc = youngestChild[ordinal]; + int yc = children[ordinal]; while (yc >= endOffset) { - yc = olderSibling[yc]; + yc = siblings[yc]; } // make use of the fact that TaxonomyReader.INVALID_ORDINAL == -1, < endOffset // and it, too, can stop the loop. @@ -161,7 +161,7 @@ // need to proceed to its sibling localDepth--; // change element now on top of stack to its sibling. - ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]]; + ordinalStack[localDepth] = siblings[ordinalStack[localDepth]]; continue; } // top of stack is not invalid, this is the first time we see it on top of stack. @@ -187,9 +187,9 @@ } if (localDepth < depth) { // push kid of current tos - yc = youngestChild[tosOrdinal]; + yc = children[tosOrdinal]; while (yc >= endOffset) { - yc = olderSibling[yc]; + yc = siblings[yc]; } ordinalStack[++localDepth] = yc; } else { // localDepth == depth; current tos exhausted its possible children, mark this by pushing INVALID_ORDINAL Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (revision 1417812) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (working copy) @@ -4,16 +4,15 @@ import java.util.ArrayList; import java.util.List; -import org.apache.lucene.util.PriorityQueue; - import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest.SortOrder; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.search.results.IntermediateFacetResult; import org.apache.lucene.facet.search.results.MutableFacetResultNode; -import org.apache.lucene.facet.search.results.IntermediateFacetResult; -import org.apache.lucene.facet.taxonomy.ChildrenArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; +import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.collections.IntIterator; import org.apache.lucene.util.collections.IntToObjectMap; @@ -141,9 +140,9 @@ } int endOffset = offset + partitionSize; // one past the largest ordinal in the partition - ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays(); - int[] youngestChild = childrenArray.getYoungestChildArray(); - int[] olderSibling = childrenArray.getOlderSiblingArray(); + ParallelTaxonomyArrays childrenArray = taxonomyReader.getParallelTaxonomyArrays(); + int[] children = childrenArray.children(); + int[] siblings = childrenArray.siblings(); int totalNumOfDescendantsConsidered = 0; // total number of facets with value != 0, // in the tree. These include those selected as top K in each node, and all the others that // were not. Not including rootNode @@ -217,7 +216,7 @@ * we can continue to the older sibling of rootNode once the localDepth goes down, before we verify that * it went that down) */ - ordinalStack[++localDepth] = youngestChild[rootNode]; + ordinalStack[++localDepth] = children[rootNode]; siblingExplored[localDepth] = Integer.MAX_VALUE; // we have not verified position wrt current partition siblingExplored[0] = -1; // as if rootNode resides to the left of current position @@ -238,7 +237,7 @@ // its child, now just removed, would not have been pushed on it. // so the father is either inside the partition, or smaller ordinal if (siblingExplored[localDepth] < 0 ) { - ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]]; + ordinalStack[localDepth] = siblings[ordinalStack[localDepth]]; continue; } // in this point, siblingExplored[localDepth] between 0 and number of bestSiblings @@ -264,7 +263,7 @@ //tosOrdinal was not examined yet for its position relative to current partition // and the best K of current partition, among its siblings, have not been determined yet while (tosOrdinal >= endOffset) { - tosOrdinal = olderSibling[tosOrdinal]; + tosOrdinal = siblings[tosOrdinal]; } // now it is inside. Run it and all its siblings inside the partition through a heap // and in doing so, count them, find best K, and sum into residue @@ -297,12 +296,12 @@ // update totalNumOfDescendants by the now excluded node and all its descendants totalNumOfDescendantsConsidered--; // reduce the 1 earned when the excluded node entered the heap // and now return it and all its descendants. These will never make it to FacetResult - totalNumOfDescendantsConsidered += countOnly (ac.ordinal, youngestChild, - olderSibling, arrays, partitionSize, offset, endOffset, localDepth, depth); + totalNumOfDescendantsConsidered += countOnly (ac.ordinal, children, + siblings, arrays, partitionSize, offset, endOffset, localDepth, depth); reusables[++tosReuslables] = ac; } } - tosOrdinal = olderSibling[tosOrdinal]; + tosOrdinal = siblings[tosOrdinal]; } // now pq has best K children of ordinals that belong to the given partition. // Populate a new AACO with them. @@ -343,7 +342,7 @@ ordinalStack[++localDepth] = TaxonomyReader.INVALID_ORDINAL; continue; } - ordinalStack[++localDepth] = youngestChild[tosOrdinal]; + ordinalStack[++localDepth] = children[tosOrdinal]; siblingExplored[localDepth] = Integer.MAX_VALUE; } // endof loop while stack is not empty Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java (revision 1417812) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java (working copy) @@ -1,87 +0,0 @@ -package org.apache.lucene.facet.taxonomy; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Equivalent representations of the taxonomy's parent info, - * used internally for efficient computation of facet results: - * "youngest child" and "oldest sibling" - */ -public class ChildrenArrays { - - private final int[] youngestChild, olderSibling; - - public ChildrenArrays(int[] parents) { - this(parents, null); - } - - public ChildrenArrays(int[] parents, ChildrenArrays copyFrom) { - youngestChild = new int[parents.length]; - olderSibling = new int[parents.length]; - int first = 0; - if (copyFrom != null) { - System.arraycopy(copyFrom.getYoungestChildArray(), 0, youngestChild, 0, copyFrom.getYoungestChildArray().length); - System.arraycopy(copyFrom.getOlderSiblingArray(), 0, olderSibling, 0, copyFrom.getOlderSiblingArray().length); - first = copyFrom.getOlderSiblingArray().length; - } - computeArrays(parents, first); - } - - private void computeArrays(int[] parents, int first) { - // reset the youngest child of all ordinals. while this should be done only - // for the leaves, we don't know up front which are the leaves, so we reset - // all of them. - for (int i = first; i < parents.length; i++) { - youngestChild[i] = TaxonomyReader.INVALID_ORDINAL; - } - - // the root category has no parent, and therefore no siblings - if (first == 0) { - first = 1; - olderSibling[0] = TaxonomyReader.INVALID_ORDINAL; - } - - for (int i = first; i < parents.length; i++) { - // note that parents[i] is always < i, so the right-hand-side of - // the following line is already set when we get here - olderSibling[i] = youngestChild[parents[i]]; - youngestChild[parents[i]] = i; - } - } - - /** - * Returns an {@code int[]} the size of the taxonomy listing for each category - * the ordinal of its immediate older sibling (the sibling in the taxonomy - * tree with the highest ordinal below that of the given ordinal). The value - * for a category with no older sibling is {@link TaxonomyReader#INVALID_ORDINAL}. - */ - public int[] getOlderSiblingArray() { - return olderSibling; - } - - /** - * Returns an {@code int[]} the size of the taxonomy listing the ordinal of - * the youngest (highest numbered) child category of each category in the - * taxonomy. The value for a leaf category (a category without children) is - * {@link TaxonomyReader#INVALID_ORDINAL}. - */ - public int[] getYoungestChildArray() { - return youngestChild; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (revision 1417812) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (working copy) @@ -5,6 +5,7 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; import org.apache.lucene.store.AlreadyClosedException; /* @@ -162,7 +163,7 @@ } /** - * Returns a {@link ChildrenArrays} object which can be used together to + * Returns a {@link ParallelTaxonomyArrays} object which can be used to * efficiently enumerate the children of any category. *

* The caller can hold on to the object it got indefinitely - it is guaranteed @@ -171,7 +172,7 @@ * as read-only and not modify it, because other callers might have * gotten the same object too. */ - public abstract ChildrenArrays getChildrenArrays() throws IOException; + public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException; /** * Retrieve user committed data. @@ -195,7 +196,6 @@ * Returns the ordinal of the parent category of the category with the given * ordinal, according to the following rules: * - * *