Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1429478) +++ lucene/CHANGES.txt (working copy) @@ -103,6 +103,8 @@ implementations. NOTE: indexes that contain category enhancements/associations are not supported by the new code and should be recreated. (Shai Erera) + +* LUCENE-4659: Massive cleanup to CategoryPath API. (Shai Erera) New Features Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java (revision 1429478) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java (working copy) @@ -53,13 +53,14 @@ facets.clear(); } int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc - for (int i=0; i 0); + current = current.subpath(current.length - 1); + } while (!pathPolicy.shouldAdd(current) && current.length > 0); isParent = true; return true; } @@ -82,7 +82,7 @@ @Override public void reset() throws IOException { current = categories.next(); - termAttribute.resizeBuffer(current.charsNeededForFullPath()); + termAttribute.resizeBuffer(current.fullPathLength()); isParent = false; } Index: lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (working copy) @@ -142,11 +142,7 @@ list = new ArrayList(); categoryLists.put(clp, list); } - // DrillDownStream modifies the CategoryPath by calling trim(). That means - // that the source category, as the app ses it, is modified. While for - // most apps this is not a problem, we need to protect against it. If - // CategoryPath will be made immutable, we can stop cloning. - list.add(cp.clone()); + list.add(cp); } return categoryLists; } Index: lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/NonTopLevelPathPolicy.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/NonTopLevelPathPolicy.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/NonTopLevelPathPolicy.java (working copy) @@ -39,6 +39,6 @@ */ @Override public boolean shouldAdd(CategoryPath categoryPath) { - return categoryPath.length() >= DEFAULT_MINIMAL_SUBPATH_LENGTH; + return categoryPath.length >= DEFAULT_MINIMAL_SUBPATH_LENGTH; } } Index: lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/PathPolicy.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/PathPolicy.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/PathPolicy.java (working copy) @@ -33,12 +33,12 @@ /** * A {@link PathPolicy} which adds all {@link CategoryPath} that have at least - * one component (i.e. {@link CategoryPath#length()} > 0) to the categories + * one component (i.e. {@link CategoryPath#length} > 0) to the categories * stream. */ public static final PathPolicy ALL_CATEGORIES = new PathPolicy() { @Override - public boolean shouldAdd(CategoryPath categoryPath) { return categoryPath.length() > 0; } + public boolean shouldAdd(CategoryPath categoryPath) { return categoryPath.length > 0; } }; /** Index: lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java (working copy) @@ -54,11 +54,11 @@ public static final FacetIndexingParams ALL_PARENTS = new FacetIndexingParams(); /** - * The default delimiter with which {@link CategoryPath#getComponent(int) - * components} are concatenated when written to the index, e.g. as drill-down - * terms. If you choose to override it by overiding - * {@link #getFacetDelimChar()}, you should make sure that you return a - * character that's not found in any path component. + * The default delimiter with which {@link CategoryPath#components} are + * concatenated when written to the index, e.g. as drill-down terms. If you + * choose to override it by overiding {@link #getFacetDelimChar()}, you should + * make sure that you return a character that's not found in any path + * component. */ public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749'; @@ -108,10 +108,10 @@ * that were written. *

* NOTE: You should make sure that the {@code char[]} is large enough, - * by e.g. calling {@link CategoryPath#charsNeededForFullPath()}. + * by e.g. calling {@link CategoryPath#fullPathLength()}. */ public int drillDownTermText(CategoryPath path, char[] buffer) { - return path.copyToCharArray(buffer, 0, -1, getFacetDelimChar()); + return path.copyFullPath(buffer, 0, getFacetDelimChar()); } /** Index: lucene/facet/src/java/org/apache/lucene/facet/index/params/PerDimensionIndexingParams.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/params/PerDimensionIndexingParams.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/index/params/PerDimensionIndexingParams.java (working copy) @@ -43,7 +43,7 @@ /** * Initializes a new instance with the given dimension-to-params mapping. The * dimension is considered as what's returned by - * {@link CategoryPath#getComponent(int) cp.getComponent(0)}. + * {@link CategoryPath#components cp.components[0]}. * *

* NOTE: for any dimension whose {@link CategoryListParams} is not @@ -65,7 +65,7 @@ super(categoryListParams); clParamsMap = new HashMap(); for (Entry e : paramsMap.entrySet()) { - clParamsMap.put(e.getKey().getComponent(0), e.getValue()); + clParamsMap.put(e.getKey().components[0], e.getValue()); } } @@ -83,7 +83,7 @@ @Override public CategoryListParams getCategoryListParams(CategoryPath category) { if (category != null) { - CategoryListParams clParams = clParamsMap.get(category.getComponent(0)); + CategoryListParams clParams = clParamsMap.get(category.components[0]); if (clParams != null) { return clParams; } Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (working copy) @@ -53,7 +53,7 @@ /** Return a drill-down {@link Term} for a category. */ public static final Term term(FacetIndexingParams iParams, CategoryPath path) { CategoryListParams clp = iParams.getCategoryListParams(path); - char[] buffer = new char[path.charsNeededForFullPath()]; + char[] buffer = new char[path.fullPathLength()]; iParams.drillDownTermText(path, buffer); return new Term(clp.getTerm().field(), String.valueOf(buffer)); } Index: lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java (working copy) @@ -153,7 +153,7 @@ // needed because FacetSearchParams do not allow empty FacetRequests private static final List DUMMY_REQ = Arrays.asList( - new FacetRequest[] { new CountFacetRequest(new CategoryPath(), 1) }); + new FacetRequest[] { new CountFacetRequest(CategoryPath.EMPTY, 1) }); static TotalFacetCounts compute(final IndexReader indexReader, final TaxonomyReader taxonomy, final FacetIndexingParams facetIndexingParams, Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java (working copy) @@ -1,9 +1,5 @@ package org.apache.lucene.facet.taxonomy; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.Serializable; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -23,1032 +19,196 @@ */ /** - * A CategoryPath holds a sequence of string components, specifying the - * hierarchical name of a category. - *

- * CategoryPath is designed to reduce the number of object allocations, in two - * ways: First, it keeps the components internally in two arrays, rather than - * keeping individual strings. Second, it allows reusing the same CategoryPath - * object (which can be clear()ed and new components add()ed again) and of - * add()'s parameter (which can be a reusable object, not just a string). + * Holds a sequence of string components, specifying the hierarchical name of a + * category. * * @lucene.experimental */ -public class CategoryPath implements Serializable, Cloneable, Comparable { +public class CategoryPath implements Comparable { - // A category path is a sequence of string components. It is kept - // internally as one large character array "chars" with all the string - // concatenated (without separators), and an array of integers "ends" - // pointing to the/ end of each component. Both arrays may be larger - // than actually in use. An additional integer, "ncomponents" specifies - // how many components are actually set. - // We use shorts instead of ints for "ends" to save a bit of space. This - // means that our path lengths are limited to 32767 characters - which - // should not be a problem in any realistic scenario. - protected char[] chars; - protected short[] ends; - protected short ncomponents; + /** An empty {@link CategoryPath} */ + public static final CategoryPath EMPTY = new CategoryPath(); - /** - * Return the number of components in the facet path. Note that this is - * not the number of characters, but the number of components. - */ - public short length() { - return ncomponents; - } + /** The components of this {@link CategoryPath}. */ + public final String[] components; - /** - * Trim the last components from the path. - * - * @param nTrim - * Number of components to trim. If larger than the number of - * components this path has, the entire path will be cleared. - */ - public void trim(int nTrim) { - if (nTrim >= this.ncomponents) { - clear(); - } else if (nTrim > 0) { - this.ncomponents -= nTrim; - } - } + /** The number of components of this {@link CategoryPath}. */ + public final int length; - /** - * Returns the current character capacity of the CategoryPath. The character - * capacity is the size of the internal buffer used to hold the characters - * of all the path's components. When a component is added and the capacity - * is not big enough, the buffer is automatically grown, and capacityChars() - * increases. - */ - public int capacityChars() { - return chars.length; + // Used by singleton EMPTY + private CategoryPath() { + components = new String[0]; + length = 0; } - /** - * Returns the current component capacity of the CategoryPath. The component - * capacity is the maximum number of components that the internal buffer can - * currently hold. When a component is added beyond this capacity, the - * buffer is automatically grown, and capacityComponents() increases. - */ - public int capacityComponents() { - return ends.length; + // Used by subpath + private CategoryPath(CategoryPath copyFrom, int prefixLen) { + this.components = copyFrom.components; + length = prefixLen; } - - /** - * Construct a new empty CategoryPath object. CategoryPath objects are meant - * to be reused, by add()ing components, and later clear()ing, and add()ing - * components again. The CategoryPath object is created with a buffer - * pre-allocated for a given number of characters and components, but the - * buffer will grow as necessary (see {@link #capacityChars()} and - * {@link #capacityComponents()}). - */ - public CategoryPath(int capacityChars, int capacityComponents) { - ncomponents = 0; - chars = new char[capacityChars]; - ends = new short[capacityComponents]; + + /** Construct from the given path components. */ + public CategoryPath(String... components) { + this.components = components; + length = components.length; } - /** - * Create an empty CategoryPath object. Equivalent to the constructor - * {@link #CategoryPath(int, int)} with the two initial-capacity arguments - * set to zero. - */ - public CategoryPath() { - this(0, 0); - } - - /** - * Add the given component to the end of the path. - *

- * Note that when a String object is passed to this method, a reference to - * it is not saved (rather, its content is copied), which will lead to that - * String object being gc'ed. To reduce the number of garbage objects, you - * can pass a mutable CharBuffer instead of an immutable String to this - * method. - */ - public void add(CharSequence component) { - // Set the new end, increasing the "ends" array sizes if necessary: - if (ncomponents >= ends.length) { - short[] newends = new short[(ends.length + 1) * 2]; - System.arraycopy(ends, 0, newends, 0, ends.length); - ends = newends; + /** Construct from a given path, separating path components with {@code delimiter}. */ + public CategoryPath(String pathString, char delimiter) { + String[] comps = pathString.split(Character.toString(delimiter)); + if (comps.length == 1 && comps[0].isEmpty()) { + components = EMPTY.components; + length = 0; + } else { + components = comps; + length = components.length; } - short prevend = (ncomponents == 0) ? 0 : ends[ncomponents - 1]; - int cmplen = component.length(); - ends[ncomponents] = (short) (prevend + cmplen); - - // Copy the new component's characters, increasing the "chars" array - // sizes if necessary: - if (ends[ncomponents] > chars.length) { - char[] newchars = new char[ends[ncomponents] * 2]; - System.arraycopy(chars, 0, newchars, 0, chars.length); - chars = newchars; - } - for (int i = 0; i < cmplen; i++) { - chars[prevend++] = component.charAt(i); - } - - ncomponents++; } /** - * Empty the CategoryPath object, so that it has zero components. The - * capacity of the object (see {@link #capacityChars()} and - * {@link #capacityComponents()}) is not reduced, so that the object can be - * reused without frequent reallocations. + * Returns the number of characters needed to represent the path, including + * delimiter characters, for using with + * {@link #copyFullPath(char[], int, char)}. */ - public void clear() { - ncomponents = 0; - } - - /** - * Build a string representation of the path, with its components separated - * by the given delimiter character. The resulting string is appended to a - * given Appendable, e.g., a StringBuilder, CharBuffer or Writer. - *

- * Note that the two cases of zero components and one component with zero - * length produce indistinguishable results (both of them append nothing). - * This is normally not a problem, because components should not normally - * have zero lengths. - *

- * An IOException can be thrown if the given Appendable's append() throws - * this exception. - */ - public void appendTo(Appendable out, char delimiter) throws IOException { - if (ncomponents == 0) { - return; // just append nothing... + public int fullPathLength() { + if (length == 0) return 0; + + int charsNeeded = 0; + for (int i = 0; i < length; i++) { + charsNeeded += components[i].length(); } - for (int i = 0; i < ends[0]; i++) { - out.append(chars[i]); - } - for (int j = 1; j < ncomponents; j++) { - out.append(delimiter); - for (int i = ends[j - 1]; i < ends[j]; i++) { - out.append(chars[i]); - } - } + charsNeeded += length - 1; // num delimter chars + return charsNeeded; } /** - * like {@link #appendTo(Appendable, char)}, but takes only a prefix of the - * path, rather than the whole path. - *

- * If the given prefix length is negative or bigger than the path's actual - * length, the whole path is taken. + * Compares this path with another {@link CategoryPath} for lexicographic + * order. */ - public void appendTo(Appendable out, char delimiter, int prefixLen) - throws IOException { - if (prefixLen < 0 || prefixLen > ncomponents) { - prefixLen = ncomponents; - } - if (prefixLen == 0) { - return; // just append nothing... - } - for (int i = 0; i < ends[0]; i++) { - out.append(chars[i]); - } - for (int j = 1; j < prefixLen; j++) { - out.append(delimiter); - for (int i = ends[j - 1]; i < ends[j]; i++) { - out.append(chars[i]); - } - } - } - - /** - * like {@link #appendTo(Appendable, char)}, but takes only a part of the - * path, rather than the whole path. - *

- * start specifies the first component in the subpath, and - * end is one past the last component. If start is - * negative, 0 is assumed, and if end is negative or past the - * end of the path, the path is taken until the end. Otherwise, if - * end<=start, nothing is appended. Nothing is appended also in - * the case that the path is empty. - */ - public void appendTo(Appendable out, char delimiter, int start, int end) - throws IOException { - if (start < 0) { - start = 0; - } - if (end < 0 || end > ncomponents) { - end = ncomponents; - } - if (end <= start) { - return; // just append nothing... - } - for (int i = (start == 0 ? 0 : ends[start - 1]); i < ends[start]; i++) { - out.append(chars[i]); - } - for (int j = start + 1; j < end; j++) { - out.append(delimiter); - for (int i = ends[j - 1]; i < ends[j]; i++) { - out.append(chars[i]); - } - } - } - - /** - * Build a string representation of the path, with its components separated - * by the given delimiter character. The resulting string is returned as a - * new String object. To avoid this temporary object creation, consider - * using {@link #appendTo(Appendable, char)} instead. - *

- * Note that the two cases of zero components and one component with zero - * length produce indistinguishable results (both of them return an empty - * string). This is normally not a problem, because components should not - * normally have zero lengths. - */ - public String toString(char delimiter) { - if (ncomponents == 0) { - return ""; - } - StringBuilder sb = new StringBuilder(ends[ncomponents - 1] - + (ncomponents - 1)); - try { - this.appendTo(sb, delimiter); - } catch (IOException e) { - // can't happen, because StringBuilder.append() never actually - // throws an exception! - } - return sb.toString(); - } - - /** - * This method, an implementation of the {@link Object#toString()} - * interface, is to allow simple printing of a CategoryPath, for debugging - * purposes. When possible, it recommended to avoid using it it, and rather, - * if you want to output the path with its components separated by a - * delimiter character, specify the delimiter explicitly, with - * {@link #toString(char)}. - */ @Override - public String toString() { - return toString('/'); - } - - /** - * like {@link #toString(char)}, but takes only a prefix with a given number - * of components, rather than the whole path. - *

- * If the given length is negative or bigger than the path's actual length, - * the whole path is taken. - */ - public String toString(char delimiter, int prefixLen) { - if (prefixLen < 0 || prefixLen > ncomponents) { - prefixLen = ncomponents; + public int compareTo(CategoryPath other) { + int length = this.length < other.length ? this.length : other.length; + for (int i = 0, j = 0; i < length; i++, j++) { + int cmp = components[i].compareTo(other.components[j]); + if (cmp < 0) return -1; // this is 'before' + if (cmp > 0) return 1; // this is 'after' } - if (prefixLen == 0) { - return ""; - } - StringBuilder sb = new StringBuilder(ends[prefixLen - 1] - + (prefixLen - 1)); - try { - this.appendTo(sb, delimiter, prefixLen); - } catch (IOException e) { - // can't happen, because sb.append() never actually throws an - // exception - } - return sb.toString(); + + // one is a prefix of the other + return length - other.length; } /** - * like {@link #toString(char)}, but takes only a part of the path, rather - * than the whole path. - *

- * start specifies the first component in the subpath, and - * end is one past the last component. If start is - * negative, 0 is assumed, and if end is negative or past the - * end of the path, the path is taken until the end. Otherwise, if - * end<=start, an empty string is returned. An emptry string is - * returned also in the case that the path is empty. - */ - public String toString(char delimiter, int start, int end) { - if (start < 0) { - start = 0; - } - if (end < 0 || end > ncomponents) { - end = ncomponents; - } - if (end <= start) { - return ""; - } - int startchar = (start == 0) ? 0 : ends[start - 1]; - StringBuilder sb = new StringBuilder(ends[end - 1] - startchar - + (end - start) - 1); - try { - this.appendTo(sb, delimiter, start, end); - } catch (IOException e) { - // can't happen, because sb.append() never actually throws an - // exception - } - return sb.toString(); - } - - /** - * Return the i'th component of the path, in a new String object. If there - * is no i'th component, a null is returned. - */ - public String getComponent(int i) { - if (i < 0 || i >= ncomponents) { - return null; - } - if (i == 0) { - return new String(chars, 0, ends[0]); - } - return new String(chars, ends[i - 1], ends[i] - ends[i - 1]); - } - - /** - * Return the last component of the path, in a new String object. If the - * path is empty, a null is returned. - */ - public String lastComponent() { - if (ncomponents == 0) { - return null; - } - if (ncomponents == 1) { - return new String(chars, 0, ends[0]); - } - return new String(chars, ends[ncomponents - 2], ends[ncomponents - 1] - - ends[ncomponents - 2]); - } - - /** - * Copies the specified number of components from this category path to the - * specified character array, with the components separated by a given - * delimiter character. The array must be large enough to hold the + * Copies the path components to the given {@code char[]}, starting at index + * {@code start}. {@code delimiter} is copied between the path components. + * Returns the number of chars copied. + * + *

+ * NOTE: this method relies on the array being large enough to hold the * components and separators - the amount of needed space can be calculated - * with {@link #charsNeededForFullPath()}. - *

- * This method returns the number of characters written to the array. - * - * @param outputBuffer - * The destination character array. - * @param outputBufferStart - * The first location to write in the output array. - * @param numberOfComponentsToCopy - * The number of path components to write to the destination - * buffer. - * @param separatorChar - * The separator inserted between every pair of path components - * in the output buffer. - * @see #charsNeededForFullPath() + * with {@link #fullPathLength()}. */ - public int copyToCharArray(char[] outputBuffer, int outputBufferStart, - int numberOfComponentsToCopy, char separatorChar) { - if (numberOfComponentsToCopy == 0) { + public int copyFullPath(char[] buf, int start, char delimiter) { + if (length == 0) { return 0; } - if (numberOfComponentsToCopy < 0 - || numberOfComponentsToCopy > ncomponents) { - numberOfComponentsToCopy = ncomponents; - } - int outputBufferInitialStart = outputBufferStart; // for calculating - // chars copied. - int sourceStart = 0; - int sourceLength = ends[0]; - for (int component = 0; component < numberOfComponentsToCopy; component++) { - if (component > 0) { - sourceStart = ends[component - 1]; - sourceLength = ends[component] - sourceStart; - outputBuffer[outputBufferStart++] = separatorChar; - } - System.arraycopy(chars, sourceStart, outputBuffer, - outputBufferStart, sourceLength); - outputBufferStart += sourceLength; - } - return outputBufferStart - outputBufferInitialStart; - } - /** - * Returns the number of characters required to represent this entire - * category path, if written using - * {@link #copyToCharArray(char[], int, int, char)} or - * {@link #appendTo(Appendable, char)}. This includes the number of - * characters in all the components, plus the number of separators between - * them (each one character in the aforementioned methods). - */ - public int charsNeededForFullPath() { - if (ncomponents == 0) { - return 0; + int numCharsCopied = 0; + int idx = start; + int upto = length - 1; + for (int i = 0; i < upto; i++) { + int len = components[i].length(); + components[i].getChars(0, len, buf, idx); + idx += len; + buf[idx++] = delimiter; + numCharsCopied += len + 1; } - return ends[ncomponents - 1] + ncomponents - 1; + components[upto].getChars(0, components[upto].length(), buf, idx); + numCharsCopied += components[upto].length(); + + return numCharsCopied; } - /** - * Construct a new CategoryPath object, given a single string with - * components separated by a given delimiter character. - *

- * The initial capacity of the constructed object will be exactly what is - * needed to hold the given path. This fact is convenient when creating a - * temporary object that will not be reused later. - */ - public CategoryPath(String pathString, char delimiter) { - if (pathString.length() == 0) { - ncomponents = 0; - chars = new char[0]; - ends = new short[0]; - return; - } - - // This constructor is often used for creating a temporary object - // (one which will not be reused to hold multiple paths), so we want - // to do our best to allocate exactly the needed size - not less (to - // avoid reallocation) and not more (so as not to waste space). - // To do this, we unfortunately need to make an additional pass on the - // given string: - int nparts = 1; - for (int i = pathString.indexOf(delimiter); i >= 0; i = pathString - .indexOf(delimiter, i + 1)) { - nparts++; - } - - ends = new short[nparts]; - chars = new char[pathString.length() - nparts + 1]; - ncomponents = 0; - - add(pathString, delimiter); - } - - /** - * Add the given components to the end of the path. The components are given - * in a single string, separated by a given delimiter character. If the - * given string is empty, it is assumed to refer to the root (empty) - * category, and nothing is added to the path (rather than adding a single - * empty component). - *

- * Note that when a String object is passed to this method, a reference to - * it is not saved (rather, its content is copied), which will lead to that - * String object being gc'ed. To reduce the number of garbage objects, you - * can pass a mutable CharBuffer instead of an immutable String to this - * method. - */ - public void add(CharSequence pathString, char delimiter) { - int len = pathString.length(); - if (len == 0) { - return; // assume root category meant, so add nothing. - } - short pos = (ncomponents == 0) ? 0 : ends[ncomponents - 1]; - for (int i = 0; i < len; i++) { - char c = pathString.charAt(i); - if (c == delimiter) { - if (ncomponents >= ends.length) { - short[] newends = new short[(ends.length + 1) * 2]; - System.arraycopy(ends, 0, newends, 0, ends.length); - ends = newends; - } - ends[ncomponents++] = pos; - } else { - if (pos >= chars.length) { - char[] newchars = new char[(chars.length + 1) * 2]; - System.arraycopy(chars, 0, newchars, 0, chars.length); - chars = newchars; - } - chars[pos++] = c; - } - } - - // Don't forget to count the last component! - if (ncomponents >= ends.length) { - short[] newends = new short[(ends.length + 1) * 2]; - System.arraycopy(ends, 0, newends, 0, ends.length); - ends = newends; - } - ends[ncomponents++] = pos; - } - - /** - * Construct a new CategoryPath object, copying an existing path given as an - * array of strings. - *

- * The new object occupies exactly the space it needs, without any spare - * capacity. This is the expected behavior in the typical use case, where - * this constructor is used to create a temporary object which is never - * reused. - */ - public CategoryPath(CharSequence... components) { - this.ncomponents = (short) components.length; - this.ends = new short[ncomponents]; - if (ncomponents > 0) { - this.ends[0] = (short) components[0].length(); - for (int i = 1; i < ncomponents; i++) { - this.ends[i] = (short) (this.ends[i - 1] + components[i] - .length()); - } - this.chars = new char[this.ends[ncomponents - 1]]; - CharSequence cs = components[0]; - if (cs instanceof String) { - ((String) cs).getChars(0, cs.length(), this.chars, 0); - } else { - for (int j = 0, k = cs.length(); j < k; j++) { - this.chars[j] = cs.charAt(j); - } - } - for (int i = 1; i < ncomponents; i++) { - cs = components[i]; - int offset = this.ends[i - 1]; - if (cs instanceof String) { - ((String) cs).getChars(0, cs.length(), this.chars, offset); - } else { - for (int j = 0, k = cs.length(); j < k; j++) { - this.chars[j + offset] = cs.charAt(j); - } - } - } - } else { - this.chars = new char[0]; - } - } - - /** - * Construct a new CategoryPath object, copying the path given in an - * existing CategoryPath object. - *

- * This copy-constructor is handy when you need to save a reference to a - * CategoryPath (e.g., when it serves as a key to a hash-table), but cannot - * save a reference to the original object because its contents can be - * changed later by the user. Copying the contents into a new object is a - * solution. - *

- * This constructor does not copy the capacity (spare buffer size) - * of the existing CategoryPath. Rather, the new object occupies exactly the - * space it needs, without any spare. This is the expected behavior in the - * typical use case outlined in the previous paragraph. - */ - public CategoryPath(CategoryPath existing) { - ncomponents = existing.ncomponents; - if (ncomponents == 0) { - chars = new char[0]; - ends = new short[0]; - return; - } - - chars = new char[existing.ends[ncomponents - 1]]; - System.arraycopy(existing.chars, 0, chars, 0, chars.length); - ends = new short[ncomponents]; - System.arraycopy(existing.ends, 0, ends, 0, ends.length); - } - - /** - * Construct a new CategoryPath object, copying a prefix with the given - * number of components of the path given in an existing CategoryPath - * object. - *

- * If the given length is negative or bigger than the given path's actual - * length, the full path is taken. - *

- * This constructor is often convenient for creating a temporary object with - * a path's prefix, but this practice is wasteful, and therefore - * inadvisable. Rather, the application should be written in a way that - * allows considering only a prefix of a given path, without needing to make - * a copy of that path. - */ - public CategoryPath(CategoryPath existing, int prefixLen) { - if (prefixLen < 0 || prefixLen > existing.ncomponents) { - ncomponents = existing.ncomponents; - } else { - ncomponents = (short) prefixLen; - } - if (ncomponents == 0) { - chars = new char[0]; - ends = new short[0]; - return; - } - - chars = new char[existing.ends[ncomponents - 1]]; - System.arraycopy(existing.chars, 0, chars, 0, chars.length); - ends = new short[ncomponents]; - System.arraycopy(existing.ends, 0, ends, 0, ends.length); - } - @Override - public CategoryPath clone() { - return new CategoryPath(this); - } - - /** - * Compare the given CategoryPath to another one. For two category paths to - * be considered equal, only the path they contain needs to be identical The - * unused capacity of the objects is not considered in the comparison. - */ - @Override public boolean equals(Object obj) { - if (obj instanceof CategoryPath) { - CategoryPath other = (CategoryPath) obj; - if (other.ncomponents != this.ncomponents) { - return false; - } - // Unfortunately, Arrays.equal() can only compare entire arrays, - // and in our case we potentially have unused parts of the arrays - // that must not be compared... I wish that some future version - // of Java has a offset and length parameter to Arrays.equal - // (sort of like System.arraycopy()). - if (ncomponents == 0) { - return true; // nothing to compare... - } - for (int i = 0; i < ncomponents; i++) { - if (this.ends[i] != other.ends[i]) { - return false; - } - } - int len = ends[ncomponents - 1]; - for (int i = 0; i < len; i++) { - if (this.chars[i] != other.chars[i]) { - return false; - } - } - return true; - } - return false; - } - - /** - * Test whether this object is a descendant of another CategoryPath. This is - * true if the other CategoryPath is the prefix of this. - */ - public boolean isDescendantOf(CategoryPath other) { - if (this.ncomponents < other.ncomponents) { + if (!(obj instanceof CategoryPath)) { return false; } - int j = 0; - for (int i = 0; i < other.ncomponents; i++) { - if (ends[i] != other.ends[i]) { + + CategoryPath other = (CategoryPath) obj; + if (length != other.length) { + return false; // not same length, cannot be equal + } + + for (int i = 0, j = 0; i < length; i++, j++) { + if (!components[i].equals(other.components[j])) { return false; } - for (; j < ends[i]; j++) { - if (this.chars[j] != other.chars[j]) { - return false; - } - } } return true; } - /** - * Calculate a hashCode for this path, used when a CategoryPath serves as a - * hash-table key. If two objects are equal(), their hashCodes need to be - * equal, so like in equal(), hashCode does not consider unused portions of - * the internal buffers in its calculation. - *

- * The hash function used is modeled after Java's String.hashCode() - a - * simple multiplicative hash function with the multiplier 31. The same hash - * function also appeared in Kernighan & Ritchie's second edition of - * "The C Programming Language" (1988). - */ @Override public int hashCode() { - if (ncomponents == 0) { + if (length == 0) { return 0; } - int hash = ncomponents; - // Unfortunately, Arrays.hashCode() can only calculate a hash code - // for an entire arrays, and in our case we potentially have unused - // parts of the arrays that must be ignored, so must use our own loop - // over the characters. I wish that some future version of Java will - // add offset and length parameters to Arrays.hashCode (sort of like - // System.arraycopy()'s parameters). - for (int i = 0; i < ncomponents; i++) { - hash = hash * 31 + ends[i]; + + int hash = length; + for (int i = 0; i < length; i++) { + hash = hash * 31 + components[i].hashCode(); } - int len = ends[ncomponents - 1]; - for (int i = 0; i < len; i++) { - hash = hash * 31 + chars[i]; - } return hash; } - /** - * Like {@link #hashCode()}, but find the hash function of a prefix with the - * given number of components, rather than of the entire path. - */ - public int hashCode(int prefixLen) { - if (prefixLen < 0 || prefixLen > ncomponents) { - prefixLen = ncomponents; - } - if (prefixLen == 0) { - return 0; - } - int hash = prefixLen; - for (int i = 0; i < prefixLen; i++) { - hash = hash * 31 + ends[i]; - } - int len = ends[prefixLen - 1]; - for (int i = 0; i < len; i++) { - hash = hash * 31 + chars[i]; - } - return hash; - } - - /** - * Calculate a 64-bit hash function for this path. Unlike - * {@link #hashCode()}, this method is not part of the Java standard, and is - * only used if explicitly called by the user. - *

- * If two objects are equal(), their hash codes need to be equal, so like in - * {@link #equals(Object)}, longHashCode does not consider unused portions - * of the internal buffers in its calculation. - *

- * The hash function used is a simple multiplicative hash function, with the - * multiplier 65599. While Java's standard multiplier 31 (used in - * {@link #hashCode()}) gives a good distribution for ASCII strings, it - * turns out that for foreign-language strings (with 16-bit characters) it - * gives too many collisions, and a bigger multiplier produces fewer - * collisions in this case. - */ + /** Calculate a 64-bit hash function for this path. */ public long longHashCode() { - if (ncomponents == 0) { + if (length == 0) { return 0; } - long hash = ncomponents; - for (int i = 0; i < ncomponents; i++) { - hash = hash * 65599 + ends[i]; + + long hash = length; + for (int i = 0; i < length; i++) { + hash = hash * 65599 + components[i].hashCode(); } - int len = ends[ncomponents - 1]; - for (int i = 0; i < len; i++) { - hash = hash * 65599 + chars[i]; - } return hash; } - /** - * Like {@link #longHashCode()}, but find the hash function of a prefix with - * the given number of components, rather than of the entire path. - */ - public long longHashCode(int prefixLen) { - if (prefixLen < 0 || prefixLen > ncomponents) { - prefixLen = ncomponents; + /** Returns a sub-path of this path up to {@code length} components. */ + public CategoryPath subpath(int length) { + if (length >= this.length || length < 0) { + return this; + } else if (length == 0) { + return EMPTY; + } else { + return new CategoryPath(this, length); } - if (prefixLen == 0) { - return 0; - } - long hash = prefixLen; - for (int i = 0; i < prefixLen; i++) { - hash = hash * 65599 + ends[i]; - } - int len = ends[prefixLen - 1]; - for (int i = 0; i < len; i++) { - hash = hash * 65599 + chars[i]; - } - return hash; } /** - * Write out a serialized (as a character sequence) representation of the - * path to a given Appendable (e.g., a StringBuilder, CharBuffer, Writer, or - * something similar. - *

- * This method may throw a IOException if the given Appendable threw this - * exception while appending. - */ - public void serializeAppendTo(Appendable out) throws IOException { - // Note that we use the fact that ncomponents and ends[] are shorts, - // so we can write them as chars: - out.append((char) ncomponents); - if (ncomponents == 0) { - return; - } - for (int i = 0; i < ncomponents; i++) { - out.append((char) ends[i]); - } - int usedchars = ends[ncomponents - 1]; - for (int i = 0; i < usedchars; i++) { - out.append(chars[i]); - } - } - - /** - * Just like {@link #serializeAppendTo(Appendable)}, but writes only a - * prefix of the CategoryPath. - */ - public void serializeAppendTo(int prefixLen, Appendable out) - throws IOException { - if (prefixLen < 0 || prefixLen > ncomponents) { - prefixLen = ncomponents; - } - // Note that we use the fact that ncomponents and ends[] are shorts, - // so we can write them as chars: - out.append((char) prefixLen); - if (prefixLen == 0) { - return; - } - for (int i = 0; i < prefixLen; i++) { - out.append((char) ends[i]); - } - int usedchars = ends[prefixLen - 1]; - for (int i = 0; i < usedchars; i++) { - out.append(chars[i]); - } - } - - /** - * Set a CategoryPath from a character-sequence representation written by - * {@link #serializeAppendTo(Appendable)}. - *

- * Reading starts at the given offset into the given character sequence, and - * the offset right after the end of this path is returned. - */ - public int setFromSerialized(CharSequence buffer, int offset) { - ncomponents = (short) buffer.charAt(offset++); - if (ncomponents == 0) { - return offset; - } - - if (ncomponents >= ends.length) { - ends = new short[Math.max(ends.length * 2, ncomponents)]; - } - for (int i = 0; i < ncomponents; i++) { - ends[i] = (short) buffer.charAt(offset++); - } - - int usedchars = ends[ncomponents - 1]; - if (usedchars > chars.length) { - chars = new char[Math.max(chars.length * 2, usedchars)]; - } - for (int i = 0; i < usedchars; i++) { - chars[i] = buffer.charAt(offset++); - } - - return offset; - } - - /** - * Check whether the current path is identical to the one serialized (with - * {@link #serializeAppendTo(Appendable)}) in the given buffer, at the given - * offset. - */ - public boolean equalsToSerialized(CharSequence buffer, int offset) { - int n = (short) buffer.charAt(offset++); - if (ncomponents != n) { - return false; - } - if (ncomponents == 0) { - return true; - } - for (int i = 0; i < ncomponents; i++) { - if (ends[i] != (short) buffer.charAt(offset++)) { - return false; - } - } - int usedchars = ends[ncomponents - 1]; - for (int i = 0; i < usedchars; i++) { - if (chars[i] != buffer.charAt(offset++)) { - return false; - } - } - return true; - } - - /** - * Just like {@link #equalsToSerialized(CharSequence, int)}, but compare to - * a prefix of the CategoryPath, instead of the whole CategoryPath. - */ - public boolean equalsToSerialized(int prefixLen, CharSequence buffer, - int offset) { - if (prefixLen < 0 || prefixLen > ncomponents) { - prefixLen = ncomponents; - } - int n = (short) buffer.charAt(offset++); - if (prefixLen != n) { - return false; - } - if (prefixLen == 0) { - return true; - } - for (int i = 0; i < prefixLen; i++) { - if (ends[i] != (short) buffer.charAt(offset++)) { - return false; - } - } - int usedchars = ends[prefixLen - 1]; - for (int i = 0; i < usedchars; i++) { - if (chars[i] != buffer.charAt(offset++)) { - return false; - } - } - return true; - } - - /** - * This method calculates a hash function of a path that has been written to - * (using {@link #serializeAppendTo(Appendable)}) a character buffer. It is - * guaranteed that the value returned is identical to that which - * {@link #hashCode()} would have produced for the original object before it - * was serialized. - */ - public static int hashCodeOfSerialized(CharSequence buffer, int offset) { - // Note: the algorithm here must be identical to that of hashCode(), - // in order that they produce identical results! - int ncomponents = (short) buffer.charAt(offset++); - if (ncomponents == 0) { - return 0; - } - int hash = ncomponents; - for (int i = 0; i < ncomponents; i++) { - hash = hash * 31 + buffer.charAt(offset++); - } - int len = buffer.charAt(offset - 1); - for (int i = 0; i < len; i++) { - hash = hash * 31 + buffer.charAt(offset++); - } - return hash; - } - - /** - * Serializes the content of this CategoryPath to a byte stream, using UTF-8 - * encoding to convert characters to bytes, and treating the ends as 16-bit - * characters. + * Returns a string representation of the path, separating components with + * '/'. * - * @param osw - * The output byte stream. - * @throws IOException - * If there are encoding errors. + * @see #toString(char) */ - // TODO (Facet): consolidate all de/serialize method names to - // serialize() and unserialize() - public void serializeToStreamWriter(OutputStreamWriter osw) - throws IOException { - osw.write(this.ncomponents); - if (this.ncomponents <= 0) { - return; - } - for (int j = 0; j < this.ncomponents; j++) { - osw.write(this.ends[j]); - } - osw.write(this.chars, 0, this.ends[this.ncomponents - 1]); + @Override + public String toString() { + return toString('/'); } /** - * Serializes the content of this CategoryPath to a byte stream, using UTF-8 - * encoding to convert characters to bytes, and treating the ends as 16-bit - * characters. - * - * @param isr - * The input stream. - * @throws IOException - * If there are encoding errors. + * Returns a string representation of the path, separating components with the + * given delimiter. */ - public void deserializeFromStreamReader(InputStreamReader isr) - throws IOException { - this.ncomponents = (short) isr.read(); - if (this.ncomponents <= 0) { - return; + public String toString(char delimiter) { + if (length == 0) return ""; + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < length; i++) { + sb.append(components[i]).append(delimiter); } - if (this.ends == null || this.ends.length < this.ncomponents) { - this.ends = new short[this.ncomponents]; - } - for (int j = 0; j < this.ncomponents; j++) { - this.ends[j] = (short) isr.read(); - } - if (this.chars == null - || this.ends[this.ncomponents - 1] > chars.length) { - this.chars = new char[this.ends[this.ncomponents - 1]]; - } - isr.read(this.chars, 0, this.ends[this.ncomponents - 1]); + sb.setLength(sb.length() - 1); // remove last delimiter + return sb.toString(); } - private void writeObject(java.io.ObjectOutputStream out) - throws IOException { - OutputStreamWriter osw = new OutputStreamWriter(out, "UTF-8"); - this.serializeToStreamWriter(osw); - osw.flush(); - } - - private void readObject(java.io.ObjectInputStream in) throws IOException { - InputStreamReader isr = new InputStreamReader(in, "UTF-8"); - this.deserializeFromStreamReader(isr); - } - - /** - * Compares this CategoryPath with the other CategoryPath for lexicographic - * order. - * Returns a negative integer, zero, or a positive integer as this - * CategoryPath lexicographically precedes, equals to, or lexicographically follows - * the other CategoryPath. - */ - @Override - public int compareTo(CategoryPath other) { - int minlength = (this.length() < other.length()) ? this.length() : other.length(); - int ch = 0; - for (int co = 0 ; co < minlength; co++) { - if (this.ends[co] <= other.ends[co]) { - for ( ; ch < this.ends[co] ; ch++) { - if (this.chars[ch] != other.chars[ch]) { - return this.chars[ch] - other.chars[ch]; - } - } - if (this.ends[co] < other.ends[co]) { - return -1; - } - } else /* this.ends[co] > other.ends[co] */ { - for ( ; ch < other.ends[co] ; ch++) { - if (this.chars[ch] != other.chars[ch]) { - return this.chars[ch] - other.chars[ch]; - } - } - return +1; - } - } - // one is a prefix of the other - return this.length() - other.length(); - } } Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (working copy) @@ -205,22 +205,9 @@ */ public abstract int getParent(int ordinal) throws IOException; - /** - * Returns the path name of the category with the given ordinal. The path is - * returned as a new CategoryPath object - to reuse an existing object, use - * {@link #getPath(int, CategoryPath)}. - * - * @return a {@link CategoryPath} with the required path, or {@code null} if - * the given ordinal is unknown to the taxonomy. - */ + /** Returns the path name of the category with the given ordinal. */ public abstract CategoryPath getPath(int ordinal) throws IOException; - /** - * Same as {@link #getPath(int)}, only reuses the given {@link CategoryPath} - * instances. - */ - public abstract boolean getPath(int ordinal, CategoryPath result) throws IOException; - /** Returns the current refCount for this taxonomy reader. */ public final int getRefCount() { return refCount.get(); Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (working copy) @@ -59,8 +59,8 @@ private final DirectoryReader indexReader; // TODO: test DoubleBarrelLRUCache and consider using it instead - private LRUHashMap ordinalCache; - private LRUHashMap categoryCache; + private LRUHashMap ordinalCache; + private LRUHashMap categoryCache; private volatile ParallelTaxonomyArrays taxoArrays; @@ -72,15 +72,15 @@ * arrays. */ DirectoryTaxonomyReader(DirectoryReader indexReader, DirectoryTaxonomyWriter taxoWriter, - LRUHashMap ordinalCache, LRUHashMap categoryCache, + LRUHashMap ordinalCache, LRUHashMap categoryCache, ParallelTaxonomyArrays taxoArrays) throws IOException { this.indexReader = indexReader; this.taxoWriter = taxoWriter; this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch(); // use the same instance of the cache, note the protective code in getOrdinal and getPath - this.ordinalCache = ordinalCache == null ? new LRUHashMap(DEFAULT_CACHE_VALUE) : ordinalCache; - this.categoryCache = categoryCache == null ? new LRUHashMap(DEFAULT_CACHE_VALUE) : categoryCache; + this.ordinalCache = ordinalCache == null ? new LRUHashMap(DEFAULT_CACHE_VALUE) : ordinalCache; + this.categoryCache = categoryCache == null ? new LRUHashMap(DEFAULT_CACHE_VALUE) : categoryCache; this.taxoArrays = taxoArrays != null ? new ParallelTaxonomyArrays(indexReader, taxoArrays) : null; } @@ -102,8 +102,8 @@ // These are the default cache sizes; they can be configured after // construction with the cache's setMaxSize() method - ordinalCache = new LRUHashMap(DEFAULT_CACHE_VALUE); - categoryCache = new LRUHashMap(DEFAULT_CACHE_VALUE); + ordinalCache = new LRUHashMap(DEFAULT_CACHE_VALUE); + categoryCache = new LRUHashMap(DEFAULT_CACHE_VALUE); } /** @@ -121,41 +121,10 @@ // These are the default cache sizes; they can be configured after // construction with the cache's setMaxSize() method - ordinalCache = new LRUHashMap(DEFAULT_CACHE_VALUE); - categoryCache = new LRUHashMap(DEFAULT_CACHE_VALUE); + ordinalCache = new LRUHashMap(DEFAULT_CACHE_VALUE); + categoryCache = new LRUHashMap(DEFAULT_CACHE_VALUE); } - private String getLabel(int catID) throws IOException { - ensureOpen(); - - // Since the cache is shared with DTR instances allocated from - // doOpenIfChanged, we need to ensure that the ordinal is one that this DTR - // instance recognizes. Therefore we do this check up front, before we hit - // the cache. - if (catID < 0 || catID >= indexReader.maxDoc()) { - return null; - } - - // TODO: can we use an int-based hash impl, such as IntToObjectMap, - // wrapped as LRU? - Integer catIDInteger = Integer.valueOf(catID); - synchronized (categoryCache) { - String res = categoryCache.get(catIDInteger); - if (res != null) { - return res; - } - } - - final LoadFullPathOnly loader = new LoadFullPathOnly(); - indexReader.document(catID, loader); - String ret = loader.getFullPath(); - synchronized (categoryCache) { - categoryCache.put(catIDInteger, ret); - } - - return ret; - } - private synchronized void initTaxoArrays() throws IOException { if (taxoArrays == null) { // according to Java Concurrency in Practice, this might perform better on @@ -278,16 +247,15 @@ } @Override - public int getOrdinal(CategoryPath categoryPath) throws IOException { + public int getOrdinal(CategoryPath cp) throws IOException { ensureOpen(); - if (categoryPath.length() == 0) { + if (cp.length == 0) { return ROOT_ORDINAL; } - String path = categoryPath.toString(delimiter); // First try to find the answer in the LRU cache: synchronized (ordinalCache) { - Integer res = ordinalCache.get(path); + Integer res = ordinalCache.get(cp); if (res != null) { if (res.intValue() < indexReader.maxDoc()) { // Since the cache is shared with DTR instances allocated from @@ -307,7 +275,7 @@ // If we're still here, we have a cache miss. We need to fetch the // value from disk, and then also put it in the cache: int ret = TaxonomyReader.INVALID_ORDINAL; - DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(path), 0); + DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(cp.toString(delimiter)), 0); if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret = docs.docID(); @@ -317,7 +285,7 @@ // information about found categories, we cannot accidently tell a new // generation of DTR that a category does not exist. synchronized (ordinalCache) { - ordinalCache.put(path, Integer.valueOf(ret)); + ordinalCache.put(cp, Integer.valueOf(ret)); } } @@ -333,31 +301,33 @@ @Override public CategoryPath getPath(int ordinal) throws IOException { ensureOpen(); - // TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds - // strings with delimiters, not CategoryPath objects, so even if - // we have a cache hit, we need to process the string and build a new - // CategoryPath object every time. What is preventing us from putting - // the actual CategoryPath object in the cache is the fact that these - // objects are mutable. So we should create an immutable (read-only) - // interface that CategoryPath implements, and this method should - // return this interface, not the writable CategoryPath. - String label = getLabel(ordinal); - if (label == null) { + + // Since the cache is shared with DTR instances allocated from + // doOpenIfChanged, we need to ensure that the ordinal is one that this DTR + // instance recognizes. Therefore we do this check up front, before we hit + // the cache. + if (ordinal < 0 || ordinal >= indexReader.maxDoc()) { return null; } - return new CategoryPath(label, delimiter); - } - - @Override - public boolean getPath(int ordinal, CategoryPath result) throws IOException { - ensureOpen(); - String label = getLabel(ordinal); - if (label == null) { - return false; + + // TODO: can we use an int-based hash impl, such as IntToObjectMap, + // wrapped as LRU? + Integer catIDInteger = Integer.valueOf(ordinal); + synchronized (categoryCache) { + CategoryPath res = categoryCache.get(catIDInteger); + if (res != null) { + return res; + } } - result.clear(); - result.add(label, delimiter); - return true; + + final LoadFullPathOnly loader = new LoadFullPathOnly(); + indexReader.document(ordinal, loader); + CategoryPath ret = new CategoryPath(loader.getFullPath(), delimiter); + synchronized (categoryCache) { + categoryCache.put(catIDInteger, ret); + } + + return ret; } @Override @@ -411,7 +381,7 @@ sb.append(i + ": NULL!! \n"); continue; } - if (category.length() == 0) { + if (category.length == 0) { sb.append(i + ": EMPTY STRING!! \n"); continue; } Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (working copy) @@ -249,7 +249,7 @@ cacheIsComplete = true; // Make sure that the taxonomy always contain the root category // with category id 0. - addCategory(new CategoryPath()); + addCategory(CategoryPath.EMPTY); } else { // There are some categories on the disk, which we have not yet // read into the cache, and therefore the cache is incomplete. @@ -449,56 +449,6 @@ return doc; } - /** - * Look up the given prefix of the given category in the cache and/or the - * on-disk storage, returning that prefix's ordinal, or a negative number in - * case the category does not yet exist in the taxonomy. - */ - private int findCategory(CategoryPath categoryPath, int prefixLen) - throws IOException { - int res = cache.get(categoryPath, prefixLen); - if (res >= 0 || cacheIsComplete) { - return res; - } - - cacheMisses.incrementAndGet(); - perhapsFillCache(); - res = cache.get(categoryPath, prefixLen); - if (res >= 0 || cacheIsComplete) { - return res; - } - - initReaderManager(); - - int doc = -1; - DirectoryReader reader = readerManager.acquire(); - try { - TermsEnum termsEnum = null; // reuse - DocsEnum docs = null; // reuse - final BytesRef catTerm = new BytesRef(categoryPath.toString(delimiter, prefixLen)); - for (AtomicReaderContext ctx : reader.leaves()) { - Terms terms = ctx.reader().terms(Consts.FULL); - if (terms != null) { - termsEnum = terms.iterator(termsEnum); - if (termsEnum.seekExact(catTerm, true)) { - // liveDocs=null because the taxonomy has no deletes - docs = termsEnum.docs(null, docs, 0 /* freqs not required */); - // if the term was found, we know it has exactly one document. - doc = docs.nextDoc() + ctx.docBase; - break; - } - } - } - } finally { - readerManager.release(reader); - } - - if (doc > 0) { - addToCache(categoryPath, prefixLen, doc); - } - return doc; - } - @Override public int addCategory(CategoryPath categoryPath) throws IOException { ensureOpen(); @@ -516,7 +466,7 @@ // (while keeping the invariant that a parent is always added to // the taxonomy before its child). internalAddCategory() does all // this recursively - res = internalAddCategory(categoryPath, categoryPath.length()); + res = internalAddCategory(categoryPath); } } } @@ -532,25 +482,24 @@ * parent is always added to the taxonomy before its child). We do this by * recursion. */ - private int internalAddCategory(CategoryPath categoryPath, int length) - throws IOException { - + private int internalAddCategory(CategoryPath cp) throws IOException { // Find our parent's ordinal (recursively adding the parent category // to the taxonomy if it's not already there). Then add the parent // ordinal as payloads (rather than a stored field; payloads can be // more efficiently read into memory in bulk by LuceneTaxonomyReader) int parent; - if (length > 1) { - parent = findCategory(categoryPath, length - 1); + if (cp.length > 1) { + CategoryPath parentPath = cp.subpath(cp.length - 1); + parent = findCategory(parentPath); if (parent < 0) { - parent = internalAddCategory(categoryPath, length - 1); + parent = internalAddCategory(parentPath); } - } else if (length == 1) { + } else if (cp.length == 1) { parent = TaxonomyReader.ROOT_ORDINAL; } else { parent = TaxonomyReader.INVALID_ORDINAL; } - int id = addCategoryDocument(categoryPath, length, parent); + int id = addCategoryDocument(cp, parent); return id; } @@ -569,8 +518,7 @@ * Note that the methods calling addCategoryDocument() are synchornized, so * this method is effectively synchronized as well. */ - private int addCategoryDocument(CategoryPath categoryPath, int length, - int parent) throws IOException { + private int addCategoryDocument(CategoryPath categoryPath, int parent) throws IOException { // Before Lucene 2.9, position increments >=0 were supported, so we // added 1 to parent to allow the parent -1 (the parent of the root). // Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is @@ -580,11 +528,11 @@ // we write here (e.g., to write parent+2), and need to do a workaround // in the reader (which knows that anyway only category 0 has a parent // -1). - parentStream.set(Math.max(parent+1, 1)); + parentStream.set(Math.max(parent + 1, 1)); Document d = new Document(); d.add(parentStreamField); - fullPathField.setStringValue(categoryPath.toString(delimiter, length)); + fullPathField.setStringValue(categoryPath.toString(delimiter)); d.add(fullPathField); // Note that we do no pass an Analyzer here because the fields that are @@ -601,7 +549,7 @@ // NOTE: this line must be executed last, or else the cache gets updated // before the parents array (LUCENE-4596) - addToCache(categoryPath, length, id); + addToCache(categoryPath, id); return id; } @@ -653,14 +601,6 @@ } } - private void addToCache(CategoryPath categoryPath, int prefixLen, int id) - throws IOException { - if (cache.put(categoryPath, prefixLen, id)) { - refreshReaderManager(); - cacheIsComplete = false; - } - } - private synchronized void refreshReaderManager() throws IOException { // this method is synchronized since it cannot happen concurrently with // addCategoryDocument -- when this method returns, we must know that the @@ -760,7 +700,6 @@ boolean aborted = false; DirectoryReader reader = readerManager.acquire(); try { - CategoryPath cp = new CategoryPath(); TermsEnum termsEnum = null; DocsEnum docsEnum = null; for (AtomicReaderContext ctx : reader.leaves()) { @@ -775,8 +714,7 @@ // hence documents), there are no deletions in the index. Therefore, it // is sufficient to call next(), and then doc(), exactly once with no // 'validation' checks. - cp.clear(); - cp.add(t.utf8ToString(), delimiter); + CategoryPath cp = new CategoryPath(t.utf8ToString(), delimiter); docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); boolean res = cache.put(cp, docsEnum.nextDoc() + ctx.docBase); assert !res : "entries should not have been evicted from the cache"; @@ -857,7 +795,6 @@ final int size = r.numDocs(); final OrdinalMap ordinalMap = map; ordinalMap.setSize(size); - CategoryPath cp = new CategoryPath(); int base = 0; TermsEnum te = null; DocsEnum docs = null; @@ -867,8 +804,7 @@ te = terms.iterator(te); while (te.next() != null) { String value = te.term().utf8ToString(); - cp.clear(); - cp.add(value, Consts.DEFAULT_DELIMITER); + CategoryPath cp = new CategoryPath(value, Consts.DEFAULT_DELIMITER); final int ordinal = addCategory(cp); docs = te.docs(null, docs, DocsEnum.FLAG_NONE); ordinalMap.addMapping(docs.nextDoc() + base, ordinal); Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java (working copy) @@ -65,15 +65,6 @@ public int get(CategoryPath categoryPath); /** - * Like {@link #get(CategoryPath)}, but for a given prefix of the - * category path. - *

- * If the given length is negative or bigger than the path's actual - * length, the full path is taken. - */ - public int get(CategoryPath categoryPath, int length); - - /** * Add a category to the cache, with the given ordinal as the value. *

* If the implementation keeps only a partial cache (e.g., an LRU cache) @@ -94,15 +85,6 @@ public boolean put(CategoryPath categoryPath, int ordinal); /** - * Like {@link #put(CategoryPath, int)}, but for a given prefix of the - * category path. - *

- * If the given length is negative or bigger than the path's actual - * length, the full path is taken. - */ - public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal); - - /** * Returns true if the cache is full, such that the next {@link #put} will * evict entries from it, false otherwise. */ Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CategoryPathUtils.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CategoryPathUtils.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CategoryPathUtils.java (working copy) @@ -0,0 +1,82 @@ +package org.apache.lucene.facet.taxonomy.writercache.cl2o; + +import org.apache.lucene.facet.taxonomy.CategoryPath; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Utilities for use of {@link CategoryPath} by {@link CompactLabelToOrdinal}. */ +public class CategoryPathUtils { + + /** Serializes the given {@link CategoryPath} to the {@link CharBlockArray}. */ + public static void serialize(CategoryPath cp, CharBlockArray charBlockArray) { + charBlockArray.append((char) cp.length); + if (cp.length == 0) { + return; + } + for (int i = 0; i < cp.length; i++) { + charBlockArray.append((char) cp.components[i].length()); + charBlockArray.append(cp.components[i]); + } + } + + /** + * Calculates a hash function of a path that serialized with + * {@link #serialize(CategoryPath, CharBlockArray)}. + */ + public static int hashCodeOfSerialized(CharBlockArray charBlockArray, int offset) { + int length = (short) charBlockArray.charAt(offset++); + if (length == 0) { + return 0; + } + + int hash = length; + for (int i = 0; i < length; i++) { + int len = (short) charBlockArray.charAt(offset++); + hash = hash * 31 + charBlockArray.subSequence(offset, offset + len).hashCode(); + offset += len; + } + return hash; + } + + /** + * Check whether the {@link CategoryPath} is equal to the one serialized in + * {@link CharBlockArray}. + */ + public static boolean equalsToSerialized(CategoryPath cp, CharBlockArray charBlockArray, int offset) { + int n = charBlockArray.charAt(offset++); + if (cp.length != n) { + return false; + } + if (cp.length == 0) { + return true; + } + + for (int i = 0; i < cp.length; i++) { + int len = (short) charBlockArray.charAt(offset++); + if (len != cp.components[i].length()) { + return false; + } + if (!cp.components[i].equals(charBlockArray.subSequence(offset, offset + len))) { + return false; + } + offset += len; + } + return true; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CategoryPathUtils.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CharBlockArray.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CharBlockArray.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CharBlockArray.java (working copy) @@ -41,7 +41,7 @@ final static class Block implements Serializable, Cloneable { private static final long serialVersionUID = 1L; - char[] chars; + final char[] chars; int length; Block(int size) { @@ -149,7 +149,7 @@ @Override public char charAt(int index) { - Block b = this.blocks.get(blockIndex(index)); + Block b = blocks.get(blockIndex(index)); return b.chars[indexInBlock(index)]; } @@ -160,16 +160,27 @@ @Override public CharSequence subSequence(int start, int end) { - throw new UnsupportedOperationException("subsequence not implemented yet"); + int remaining = end - start; + StringBuilder sb = new StringBuilder(remaining); + int blockIdx = blockIndex(start); + int indexInBlock = indexInBlock(start); + while (remaining > 0) { + Block b = blocks.get(blockIdx++); + int numToAppend = Math.min(remaining, b.length - indexInBlock); + sb.append(b.chars, indexInBlock, numToAppend); + remaining -= numToAppend; + indexInBlock = 0; // 2nd+ iterations read from start of the block + } + return sb.toString(); } @Override public String toString() { - StringBuilder b = new StringBuilder(blockSize * this.blocks.size()); - for (int i = 0; i < this.blocks.size(); i++) { - b.append(this.blocks.get(i).chars); + StringBuilder sb = new StringBuilder(); + for (Block b : blocks) { + sb.append(b.chars, 0, b.length); } - return b.toString(); + return sb.toString(); } void flush(OutputStream out) throws IOException { Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/Cl2oTaxonomyWriterCache.java (working copy) @@ -78,19 +78,6 @@ } @Override - public int get(CategoryPath categoryPath, int length) { - if (length < 0 || length > categoryPath.length()) { - length = categoryPath.length(); - } - lock.readLock().lock(); - try { - return cache.getOrdinal(categoryPath, length); - } finally { - lock.readLock().unlock(); - } - } - - @Override public boolean put(CategoryPath categoryPath, int ordinal) { lock.writeLock().lock(); try { @@ -103,23 +90,7 @@ } } - @Override - public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { - lock.writeLock().lock(); - try { - cache.addLabel(categoryPath, prefixLen, ordinal); - // Tell the caller we didn't clear part of the cache, so it doesn't - // have to flush its on-disk index now - return false; - } finally { - lock.writeLock().unlock(); - } - } - - /** - * Returns the number of bytes in memory used by this object. - * @return Number of bytes in memory used by this object. - */ + /** Returns the number of bytes in memory used by this object. */ public int getMemoryUsage() { return cache == null ? 0 : cache.getMemoryUsage(); } Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CollisionMap.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CollisionMap.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CollisionMap.java (working copy) @@ -1,6 +1,5 @@ package org.apache.lucene.facet.taxonomy.writercache.cl2o; -import java.io.IOException; import java.util.Iterator; import java.util.NoSuchElementException; @@ -108,74 +107,35 @@ int bucketIndex = indexFor(hash, this.capacity); Entry e = this.entries[bucketIndex]; - while (e != null && !(hash == e.hash && label.equalsToSerialized(this.labelRepository, e.offset))) { + while (e != null && !(hash == e.hash && CategoryPathUtils.equalsToSerialized(label, labelRepository, e.offset))) { e = e.next; } if (e == null) { - return LabelToOrdinal.InvalidOrdinal; + return LabelToOrdinal.INVALID_ORDINAL; } return e.cid; } - public int get(CategoryPath label, int prefixLen, int hash) { - int bucketIndex = indexFor(hash, this.capacity); - Entry e = this.entries[bucketIndex]; - - while (e != null && !(hash == e.hash && label.equalsToSerialized(prefixLen, this.labelRepository, e.offset))) { - e = e.next; - } - if (e == null) { - return LabelToOrdinal.InvalidOrdinal; - } - - return e.cid; - } - public int addLabel(CategoryPath label, int hash, int cid) { int bucketIndex = indexFor(hash, this.capacity); for (Entry e = this.entries[bucketIndex]; e != null; e = e.next) { - if (e.hash == hash && label.equalsToSerialized(this.labelRepository, e.offset)) { + if (e.hash == hash && CategoryPathUtils.equalsToSerialized(label, labelRepository, e.offset)) { return e.cid; } } // new string; add to label repository - int offset = this.labelRepository.length(); - try { - label.serializeAppendTo(labelRepository); - } catch (IOException e) { - // can't happen, because labelRepository.append() doesn't throw an exception - } - + int offset = labelRepository.length(); + CategoryPathUtils.serialize(label, labelRepository); addEntry(offset, cid, hash, bucketIndex); return cid; } - public int addLabel(CategoryPath label, int prefixLen, int hash, int cid) { - int bucketIndex = indexFor(hash, this.capacity); - for (Entry e = this.entries[bucketIndex]; e != null; e = e.next) { - if (e.hash == hash && label.equalsToSerialized(prefixLen, this.labelRepository, e.offset)) { - return e.cid; - } - } - - // new string; add to label repository - int offset = this.labelRepository.length(); - try { - label.serializeAppendTo(prefixLen, labelRepository); - } catch (IOException e) { - // can't happen, because labelRepository.append() doesn't throw an exception - } - - addEntry(offset, cid, hash, bucketIndex); - return cid; - } - /** - * This method does not check if the same value is already - * in the map because we pass in an char-array offset, so - * so we now that we're in resize-mode here. + * This method does not check if the same value is already in the map because + * we pass in an char-array offset, so so we now that we're in resize-mode + * here. */ public void addLabelOffset(int hash, int offset, int cid) { int bucketIndex = indexFor(hash, this.capacity); Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CompactLabelToOrdinal.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CompactLabelToOrdinal.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CompactLabelToOrdinal.java (working copy) @@ -29,8 +29,6 @@ import org.apache.lucene.facet.taxonomy.CategoryPath; -// TODO: maybe this could use an FST instead... - /** * This is a very efficient LabelToOrdinal implementation that uses a * CharBlockArray to store all labels and a configurable number of HashArrays to @@ -59,8 +57,8 @@ public static final float DefaultLoadFactor = 0.15f; - static final char TerminatorChar = 0xffff; - private static final int Collision = -5; + static final char TERMINATOR_CHAR = 0xffff; + private static final int COLLISION = -5; private HashArray[] hashArrays; private CollisionMap collisionMap; @@ -103,9 +101,7 @@ private void init() { labelRepository = new CharBlockArray(); - try { - new CategoryPath().serializeAppendTo(labelRepository); - } catch (IOException e) { } //can't happen + CategoryPathUtils.serialize(CategoryPath.EMPTY, labelRepository); int c = this.capacity; for (int i = 0; i < this.hashArrays.length; i++) { @@ -116,7 +112,7 @@ @Override public void addLabel(CategoryPath label, int ordinal) { - if (this.collisionMap.size() > this.threshold) { + if (collisionMap.size() > threshold) { grow(); } @@ -127,43 +123,22 @@ } } - int prevVal = this.collisionMap.addLabel(label, hash, ordinal); + int prevVal = collisionMap.addLabel(label, hash, ordinal); if (prevVal != ordinal) { - throw new IllegalArgumentException("Label already exists: " + - label.toString('/') + " prev ordinal " + prevVal); + throw new IllegalArgumentException("Label already exists: " + label.toString('/') + " prev ordinal " + prevVal); } } @Override - public void addLabel(CategoryPath label, int prefixLen, int ordinal) { - if (this.collisionMap.size() > this.threshold) { - grow(); - } - - int hash = CompactLabelToOrdinal.stringHashCode(label, prefixLen); - for (int i = 0; i < this.hashArrays.length; i++) { - if (addLabel(this.hashArrays[i], label, prefixLen, hash, ordinal)) { - return; - } - } - - int prevVal = this.collisionMap.addLabel(label, prefixLen, hash, ordinal); - if (prevVal != ordinal) { - throw new IllegalArgumentException("Label already exists: " + - label.toString('/', prefixLen) + " prev ordinal " + prevVal); - } - } - - @Override public int getOrdinal(CategoryPath label) { if (label == null) { - return LabelToOrdinal.InvalidOrdinal; + return LabelToOrdinal.INVALID_ORDINAL; } int hash = CompactLabelToOrdinal.stringHashCode(label); for (int i = 0; i < this.hashArrays.length; i++) { int ord = getOrdinal(this.hashArrays[i], label, hash); - if (ord != Collision) { + if (ord != COLLISION) { return ord; } } @@ -171,23 +146,6 @@ return this.collisionMap.get(label, hash); } - @Override - public int getOrdinal(CategoryPath label, int prefixLen) { - if (label == null) { - return LabelToOrdinal.InvalidOrdinal; - } - - int hash = CompactLabelToOrdinal.stringHashCode(label, prefixLen); - for (int i = 0; i < this.hashArrays.length; i++) { - int ord = getOrdinal(this.hashArrays[i], label, prefixLen, hash); - if (ord != Collision) { - return ord; - } - } - - return this.collisionMap.get(label, prefixLen, hash); - } - private void grow() { HashArray temp = this.hashArrays[this.hashArrays.length - 1]; @@ -241,19 +199,13 @@ } } - private boolean addLabel(HashArray a, CategoryPath label, int hash, - int ordinal) { + private boolean addLabel(HashArray a, CategoryPath label, int hash, int ordinal) { int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length); int offset = a.offsets[index]; if (offset == 0) { a.offsets[index] = this.labelRepository.length(); - try { - label.serializeAppendTo(this.labelRepository); - } catch (IOException e) { - // can't happen - LabelRepository.append() never throws an - // exception - } + CategoryPathUtils.serialize(label, labelRepository); a.cids[index] = ordinal; return true; } @@ -261,26 +213,6 @@ return false; } - private boolean addLabel(HashArray a, CategoryPath label, int prefixLen, - int hash, int ordinal) { - int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length); - int offset = a.offsets[index]; - - if (offset == 0) { - a.offsets[index] = this.labelRepository.length(); - try { - label.serializeAppendTo(prefixLen, this.labelRepository); - } catch (IOException e) { - // can't happen - LabelRepository.append() never throws an - // exception - } - a.cids[index] = ordinal; - return true; - } - - return false; - } - private void addLabelOffset(int hash, int cid, int knownOffset) { for (int i = 0; i < this.hashArrays.length; i++) { if (addLabelOffsetToHashArray(this.hashArrays[i], hash, cid, @@ -313,43 +245,23 @@ private int getOrdinal(HashArray a, CategoryPath label, int hash) { if (label == null) { - return LabelToOrdinal.InvalidOrdinal; + return LabelToOrdinal.INVALID_ORDINAL; } - int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length); + int index = indexFor(hash, a.offsets.length); int offset = a.offsets[index]; if (offset == 0) { - return LabelToOrdinal.InvalidOrdinal; + return LabelToOrdinal.INVALID_ORDINAL; } - if (label.equalsToSerialized(labelRepository, offset)) { + if (CategoryPathUtils.equalsToSerialized(label, labelRepository, offset)) { return a.cids[index]; } - return Collision; + return COLLISION; } - private int getOrdinal(HashArray a, CategoryPath label, int prefixLen, int hash) { - if (label == null) { - return LabelToOrdinal.InvalidOrdinal; - } - - int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length); - int offset = a.offsets[index]; - if (offset == 0) { - return LabelToOrdinal.InvalidOrdinal; - } - - if (label.equalsToSerialized(prefixLen, labelRepository, offset)) { - return a.cids[index]; - } - - return Collision; - } - - /** - * Returns index for hash code h. - */ + /** Returns index for hash code h. */ static int indexFor(int h, int length) { return h & (length - 1); } @@ -378,22 +290,10 @@ } - static int stringHashCode(CategoryPath label, int prefixLen) { - int hash = label.hashCode(prefixLen); - - hash = hash ^ ((hash >>> 20) ^ (hash >>> 12)); - hash = hash ^ (hash >>> 7) ^ (hash >>> 4); - - return hash; - - } - static int stringHashCode(CharBlockArray labelRepository, int offset) { - int hash = CategoryPath.hashCodeOfSerialized(labelRepository, offset); - + int hash = CategoryPathUtils.hashCodeOfSerialized(labelRepository, offset); hash = hash ^ ((hash >>> 20) ^ (hash >>> 12)); hash = hash ^ (hash >>> 7) ^ (hash >>> 4); - return hash; } @@ -495,26 +395,17 @@ // that array offsets will work). Since the initial file is machine // generated, I think this should be OK. while (offset < l2o.labelRepository.length()) { - // First component is numcomponents, so we initialize the hash - // to this - int ncomponents = l2o.labelRepository.charAt(offset++); - int hash = ncomponents; - // If ncomponents is 0, then we are done? - if (ncomponents != 0) { - - // usedchars is always the last member of the 'ends' array - // in serialization. Rather than rebuild the entire array, - // assign usedchars to the last value we read in. This will - // be slightly more memory efficient. - int usedchars = 0; - for (int i = 0; i < ncomponents; i++) { - usedchars = l2o.labelRepository.charAt(offset++); - hash = hash * 31 + usedchars; + // identical code to CategoryPath.hashFromSerialized. since we need to + // advance offset, we cannot call the method directly. perhaps if we + // could pass a mutable Integer or something... + int length = (short) l2o.labelRepository.charAt(offset++); + int hash = length; + if (length != 0) { + for (int i = 0; i < length; i++) { + int len = (short) l2o.labelRepository.charAt(offset++); + hash = hash * 31 + l2o.labelRepository.subSequence(offset, offset + len).hashCode(); + offset += len; } - // Hash the usedchars for this label - for (int i = 0; i < usedchars; i++) { - hash = hash * 31 + l2o.labelRepository.charAt(offset++); - } } // Now that we've hashed the components of the label, do the // final part of the hash algorithm. Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/LabelToOrdinal.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/LabelToOrdinal.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/LabelToOrdinal.java (working copy) @@ -27,7 +27,7 @@ public abstract class LabelToOrdinal { protected int counter; - public static final int InvalidOrdinal = -2; + public static final int INVALID_ORDINAL = -2; /** * return the maximal Ordinal assigned so far @@ -52,22 +52,9 @@ public abstract void addLabel(CategoryPath label, int ordinal); /** - * Adds a new label if its not yet in the table. - * Throws an {@link IllegalArgumentException} if the same label with - * a different ordinal was previoulsy added to this table. - */ - public abstract void addLabel(CategoryPath label, int prefixLen, int ordinal); - - /** * @return the ordinal assigned to the given label, - * or {@link #InvalidOrdinal} if the label cannot be found in this table. + * or {@link #INVALID_ORDINAL} if the label cannot be found in this table. */ public abstract int getOrdinal(CategoryPath label); - /** - * @return the ordinal assigned to the given label, - * or {@link #InvalidOrdinal} if the label cannot be found in this table. - */ - public abstract int getOrdinal(CategoryPath label, int prefixLen); - } Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/LruTaxonomyWriterCache.java (working copy) @@ -87,23 +87,6 @@ } @Override - public synchronized int get(CategoryPath categoryPath, int length) { - if (length<0 || length>categoryPath.length()) { - length = categoryPath.length(); - } - // TODO (Facet): unfortunately, we make a copy here! we can avoid part of - // the copy by creating a wrapper object (but this still creates a new - // object). A better implementation of the cache would not use Java's - // hash table, but rather some other hash table we can control, and - // pass the length parameter into it... - Integer res = cache.get(new CategoryPath(categoryPath, length)); - if (res==null) { - return -1; - } - return res.intValue(); - } - - @Override public synchronized boolean put(CategoryPath categoryPath, int ordinal) { boolean ret = cache.put(categoryPath, new Integer(ordinal)); // If the cache is full, we need to clear one or more old entries @@ -119,20 +102,4 @@ return ret; } - @Override - public synchronized boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { - boolean ret = cache.put(categoryPath, prefixLen, new Integer(ordinal)); - // If the cache is full, we need to clear one or more old entries - // from the cache. However, if we delete from the cache a recent - // addition that isn't yet in our reader, for this entry to be - // visible to us we need to make sure that the changes have been - // committed and we reopen the reader. Because this is a slow - // operation, we don't delete entries one-by-one but rather in bulk - // (put() removes the 2/3rd oldest entries). - if (ret) { - cache.makeRoomLRU(); - } - return ret; - } - } Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/NameHashIntCacheLRU.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/NameHashIntCacheLRU.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/NameHashIntCacheLRU.java (working copy) @@ -41,6 +41,7 @@ @Override Object key(CategoryPath name, int prefixLen) { - return new Long(name.longHashCode(prefixLen)); + return new Long(name.subpath(prefixLen).longHashCode()); } + } Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/NameIntCacheLRU.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/NameIntCacheLRU.java (revision 1429478) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/NameIntCacheLRU.java (working copy) @@ -68,23 +68,13 @@ return res; } - /** - * Subclasses can override this to provide caching by e.g. hash of the string. - */ + /** Subclasses can override this to provide caching by e.g. hash of the string. */ Object key(CategoryPath name) { - // Note that a copy constructor (cloning) here is necessary, because a - // CategoryPath object is mutable, so we cannot save a reference to an - // existing CategoryPath. Subclasses which override this method can - // avoid this cloning by, e.g., hashing the name. - return new CategoryPath(name); + return name; } Object key(CategoryPath name, int prefixLen) { - // Note that a copy constructor (cloning) here is necessary, because a - // CategoryPath object is mutable, so we cannot save a reference to an - // existing CategoryPath. Subclasses which override this method can - // avoid this cloning by, e.g., hashing the name. - return new CategoryPath(name, prefixLen); + return name.subpath(prefixLen); } /** Index: lucene/facet/src/test/org/apache/lucene/facet/index/OrdinalMappingReaderTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/OrdinalMappingReaderTest.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/index/OrdinalMappingReaderTest.java (working copy) @@ -77,7 +77,7 @@ FacetResultNode node = result.getFacetResultNode(); for (FacetResultNode facet: node.getSubResults()) { int weight = (int)facet.getValue(); - int label = Integer.parseInt(facet.getLabel().getComponent(1)); + int label = Integer.parseInt(facet.getLabel().components[1]); //System.out.println(label + ": " + weight); if (VERBOSE) { System.out.println(label + ": " + weight); Index: lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/OrdinalPolicyTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/OrdinalPolicyTest.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/OrdinalPolicyTest.java (working copy) @@ -31,12 +31,10 @@ public void testDefaultOrdinalPolicy() { // check ordinal policy OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS; - assertFalse("default ordinal policy should not match root", ordinalPolicy - .shouldAdd(TaxonomyReader.ROOT_ORDINAL)); + assertFalse("default ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL)); for (int i = 0; i < 300; i++) { int ordinal = 1 + random().nextInt(Integer.MAX_VALUE - 1); - assertTrue("default ordinal policy should match " + ordinal, - ordinalPolicy.shouldAdd(ordinal)); + assertTrue("default ordinal policy should match " + ordinal, ordinalPolicy.shouldAdd(ordinal)); } } @@ -50,8 +48,7 @@ String[] topLevelStrings = new String[10]; for (int i = 0; i < 10; i++) { topLevelStrings[i] = Integer.valueOf(random().nextInt(30)).toString(); - topLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath( - topLevelStrings[i])); + topLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(topLevelStrings[i])); } int[] nonTopLevelOrdinals = new int[300]; for (int i = 0; i < 300; i++) { @@ -61,22 +58,18 @@ for (int j = 1; j < components.length; j++) { components[j] = (Integer.valueOf(random().nextInt(30))).toString(); } - nonTopLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath( - components)); + nonTopLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(components)); } // check ordinal policy OrdinalPolicy ordinalPolicy = new NonTopLevelOrdinalPolicy(); ordinalPolicy.init(taxonomy); - assertFalse("top level ordinal policy should not match root", ordinalPolicy - .shouldAdd(TaxonomyReader.ROOT_ORDINAL)); + assertFalse("top level ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL)); for (int i = 0; i < 10; i++) { - assertFalse("top level ordinal policy should not match " - + topLevelOrdinals[i], + assertFalse("top level ordinal policy should not match " + topLevelOrdinals[i], ordinalPolicy.shouldAdd(topLevelOrdinals[i])); } for (int i = 0; i < 300; i++) { - assertTrue("top level ordinal policy should match " - + nonTopLevelOrdinals[i], + assertTrue("top level ordinal policy should match " + nonTopLevelOrdinals[i], ordinalPolicy.shouldAdd(nonTopLevelOrdinals[i])); } Index: lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/PathPolicyTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/PathPolicyTest.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/PathPolicyTest.java (working copy) @@ -29,10 +29,9 @@ @Test public void testDefaultPathPolicy() { // check path policy - CategoryPath cp = new CategoryPath(); + CategoryPath cp = CategoryPath.EMPTY; PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES; - assertFalse("default path policy should not accept root", - pathPolicy.shouldAdd(cp)); + assertFalse("default path policy should not accept root", pathPolicy.shouldAdd(cp)); for (int i = 0; i < 300; i++) { int nComponents = 1 + random().nextInt(10); String[] components = new String[nComponents]; @@ -40,9 +39,7 @@ components[j] = (Integer.valueOf(random().nextInt(30))).toString(); } cp = new CategoryPath(components); - assertTrue("default path policy should accept " - + cp.toString('/'), - pathPolicy.shouldAdd(cp)); + assertTrue("default path policy should accept " + cp.toString('/'), pathPolicy.shouldAdd(cp)); } } @@ -74,7 +71,7 @@ // check ordinal policy PathPolicy pathPolicy = new NonTopLevelPathPolicy(); assertFalse("top level path policy should not match root", - pathPolicy.shouldAdd(new CategoryPath())); + pathPolicy.shouldAdd(CategoryPath.EMPTY)); for (int i = 0; i < 10; i++) { assertFalse("top level path policy should not match " + topLevelPaths[i], Index: lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java (working copy) @@ -74,7 +74,7 @@ public void testCategoryPolicies() { FacetIndexingParams dfip = FacetIndexingParams.ALL_PARENTS; // check path policy - CategoryPath cp = new CategoryPath(); + CategoryPath cp = CategoryPath.EMPTY; PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES; assertEquals("path policy does not match default for root", pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp)); for (int i = 0; i < 30; i++) { Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java (working copy) @@ -129,7 +129,7 @@ } private void toSimpleString(int depth, StringBuilder sb, FacetResultNode node, String indent) { - sb.append(indent + node.getLabel().getComponent(depth) + " (" + (int) node.getValue() + ")\n"); + sb.append(indent + node.getLabel().components[depth] + " (" + (int) node.getValue() + ")\n"); for(FacetResultNode childNode : node.getSubResults()) { toSimpleString(depth+1, sb, childNode, indent + " "); } Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (working copy) @@ -178,7 +178,7 @@ } FacetResult fr = facetResults.get(0); // a, depth=3, K=2 - boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); assertEquals(9, fr.getNumValidDescendants()); FacetResultNode parentRes = fr.getFacetResultNode(); assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE); @@ -219,7 +219,7 @@ } fr = facetResults.get(1); // a, depth=2, K=2. same result as before - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); assertEquals(9, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE); @@ -239,7 +239,7 @@ } fr = facetResults.get(2); // a, depth=1, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); assertEquals(4, fr.getNumValidDescendants(), 4); parentRes = fr.getFacetResultNode(); assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE); @@ -257,7 +257,7 @@ } fr = facetResults.get(3); // a/b, depth=3, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); assertEquals(4, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); @@ -272,7 +272,7 @@ } fr = facetResults.get(4); // a/b, depth=2, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); assertEquals(4, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); @@ -286,7 +286,7 @@ } fr = facetResults.get(5); // a/b, depth=1, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); assertEquals(4, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); @@ -300,13 +300,13 @@ } fr = facetResults.get(6); // a/b, depth=0, K=2 - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode parentRes = fr.getFacetResultNode(); assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE); assertEquals(0.0, parentRes.getResidue(), Double.MIN_VALUE); assertEquals(0, parentRes.getNumSubResults()); - hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0)); + hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]); // doctor, depth=1, K=2 assertFalse("Shouldn't have found anything for a FacetRequest " + Index: lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java (working copy) @@ -231,7 +231,7 @@ if (requestedPath == null) { parentOrdinal = 0; } else { - CategoryPath cp = new CategoryPath(requestedPath.getComponent(0)); + CategoryPath cp = new CategoryPath(requestedPath.components[0]); parentOrdinal = taxo.getOrdinal(cp); } parentArray = taxo.getParallelTaxonomyArrays().parents(); Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestCategoryPath.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestCategoryPath.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestCategoryPath.java (working copy) @@ -1,16 +1,8 @@ package org.apache.lucene.facet.taxonomy; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; - +import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.facet.taxonomy.CategoryPath; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -32,843 +24,145 @@ @Test public void testBasic() { - CategoryPath p = new CategoryPath(0,0); - assertEquals(0, p.length()); - for (int i=0; i<1000; i++) { - p.add("hello"); - assertEquals(i+1, p.length()); - } + assertEquals(0, CategoryPath.EMPTY.length); + assertEquals(1, new CategoryPath("hello").length); + assertEquals(2, new CategoryPath("hello", "world").length); } @Test - public void testConstructorCapacity() { - CategoryPath p = new CategoryPath(0,0); - assertEquals(0, p.capacityChars()); - assertEquals(0, p.capacityComponents()); - assertEquals(0, p.length()); - p = new CategoryPath(5,18); - assertEquals(5, p.capacityChars()); - assertEquals(18, p.capacityComponents()); - assertEquals(0, p.length()); - p = new CategoryPath(27,13); - assertEquals(27, p.capacityChars()); - assertEquals(13, p.capacityComponents()); - assertEquals(0, p.length()); - } - - @Test - public void testClear() { - CategoryPath p = new CategoryPath(0,0); - p.add("hi"); - p.add("there"); - assertEquals(2, p.length()); - p.clear(); - assertEquals(0, p.length()); - p.add("yo!"); - assertEquals(1, p.length()); - } - - @Test - public void testTrim() { - CategoryPath p = new CategoryPath(0,0); - p.add("this"); - p.add("message"); - p.add("will"); - p.add("self"); - p.add("destruct"); - p.add("in"); - p.add("five"); - p.add("seconds"); - assertEquals(8, p.length()); - p.trim(3); - assertEquals(5, p.length()); - p.trim(0); // no-op - assertEquals(5, p.length()); - p.trim(-3); // no-op - assertEquals(5, p.length()); - p.trim(1); - assertEquals(4, p.length()); - p.trim(8); // clear - assertEquals(0, p.length()); - p.add("yo!"); - assertEquals(1, p.length()); - p.trim(1); // clear - assertEquals(0, p.length()); - } - - @Test - public void testComponentsLimit() { - // Test that we can add up to 2^15-1 components - CategoryPath p = new CategoryPath(0,0); - for (int i=0; i<32767; i++) { - p.add(""); - assertEquals(i+1, p.length()); - } - // Also see that in the current implementation, this is actually - // the limit: if we add one more component, things break (because - // we used a short to hold ncomponents). See that it breaks in the - // way we expect it to: - p.add(""); // this still works, but... - assertEquals(-32768, p.length()); // now the length is wrong and negative - } - - @Test - public void testCharsLimit() { - // Test that we can add up to 2^15-1 characters - CategoryPath p = new CategoryPath(0,0); - for (int i=0; i<8192; i++) { - p.add("aaaa"); - } - // Also see that in the current implementation, this is actually the - // limit: If we add one more character, things break (because ends[] - // is an array of shorts), and we actually get an exception. - try { - p.add("a"); - fail("Should have thrown an exception"); - } catch (ArrayIndexOutOfBoundsException e) { - // good. - } - } - - @Test public void testToString() { - CategoryPath p = new CategoryPath(0,0); // When the category is empty, we expect an empty string - assertEquals("", p.toString('/')); - // This is (deliberately, in our implementation) indistinguishable - // from the case of a single empty component: - p.add(""); - assertEquals("", p.toString('/')); - // Check just one category (so no delimiter needed): - p.clear(); - p.add("hello"); - assertEquals("hello", p.toString('/')); - // Now for two categories: - p.clear(); - p.add("hello"); - p.add("world"); - assertEquals("hello/world", p.toString('/')); - // And for a thousand... - p.clear(); - p.add("0"); - StringBuilder expected = new StringBuilder("0"); - for (int i=1; i<1000; i++) { - String num = Integer.toString(i); - p.add(num); - expected.append('/'); - expected.append(num); - } - assertEquals(expected.toString(), p.toString('/')); - // Check that toString() without a parameter just defaults to '/': - assertEquals(expected.toString(), p.toString()); + assertEquals("", CategoryPath.EMPTY.toString('/')); + // one category (so no delimiter needed) + assertEquals("hello", new CategoryPath("hello").toString('/')); + // more than one category (so no delimiter needed) + assertEquals("hello/world", new CategoryPath("hello", "world").toString('/')); } - // testing toString() and its variants already test most of the appendTo() - // code, but not all of it (the "eclemma" code-coverage tool discovered - // this for us). Here we complete the coverage of the appendTo() methods: @Test - public void testAppendTo() throws IOException { - CategoryPath p = new CategoryPath(0,0); - StringBuilder sb = new StringBuilder(); - p.appendTo(sb, '/'); - assertEquals(0, sb.length()); - p.appendTo(sb, '/', -1); - assertEquals(0, sb.length()); - p.appendTo(sb, '/', 1); - assertEquals(0, sb.length()); - p.appendTo(sb, '/', -1, 1); - assertEquals(0, sb.length()); - } - - @Test - public void testLastComponent() { - CategoryPath p = new CategoryPath(1000,1000); - // When the category is empty, we expect a null - assertNull(p.lastComponent()); - for (int i=0; i<=100; i++) { - String num = Integer.toString(i); - p.add(num); - assertEquals(num, p.lastComponent()); - } - } - - @Test public void testGetComponent() { - CategoryPath p = new CategoryPath(1000,1000); - // When the category is empty, we expect a null - assertNull(p.getComponent(0)); - assertNull(p.getComponent(1)); - assertNull(p.getComponent(-1)); - for (int i=0; i<=100; i++) { - p.add(Integer.toString(i)); - for (int j=0; j<=i; j++) { - assertEquals(j, Integer.parseInt(p.getComponent(j))); - } - assertNull(p.getComponent(-1)); - assertNull(p.getComponent(i+1)); + String[] components = new String[atLeast(10)]; + for (int i = 0; i < components.length; i++) { + components[i] = Integer.toString(i); } + CategoryPath cp = new CategoryPath(components); + for (int i = 0; i < components.length; i++) { + assertEquals(i, Integer.parseInt(cp.components[i])); + } } - @Test - public void testToStringPrefix() { - CategoryPath p = new CategoryPath(0,0); - p.add("hi"); - p.add("there"); - p.add("man"); - assertEquals("hi/there/man", p.toString('/')); - assertEquals("", p.toString('/', 0)); - assertEquals("hi", p.toString('/', 1)); - assertEquals("hi/there", p.toString('/', 2)); - assertEquals("hi/there/man", p.toString('/', 3)); - assertEquals("hi/there/man", p.toString('/', 4)); - assertEquals("hi/there/man", p.toString('/', -1)); - } - - @Test - public void testToStringSubpath() { - CategoryPath p = new CategoryPath(0,0); - assertEquals("", p.toString('/', 0, 0)); - p.add("hi"); - p.add("there"); - p.add("man"); - assertEquals("", p.toString('/', 0, 0)); - assertEquals("hi", p.toString('/', 0, 1)); - assertEquals("hi/there", p.toString('/', 0, 2)); - assertEquals("hi/there/man", p.toString('/', 0, 3)); - assertEquals("hi/there/man", p.toString('/', 0, 4)); - assertEquals("hi/there/man", p.toString('/', 0, -1)); - assertEquals("hi/there/man", p.toString('/', -1, -1)); - assertEquals("there/man", p.toString('/', 1, -1)); - assertEquals("man", p.toString('/', 2, -1)); - assertEquals("", p.toString('/', 3, -1)); - assertEquals("there/man", p.toString('/', 1, 3)); - assertEquals("there", p.toString('/', 1, 2)); - assertEquals("", p.toString('/', 1, 1)); - } - - @Test + @Test public void testDelimiterConstructor() { - // Test that the constructor that takes a string and a delimiter - // works correctly. Also check that it allocates exactly the needed - // needed size for the array - not more. CategoryPath p = new CategoryPath("", '/'); - assertEquals(p.length(), 0); - assertEquals(p.capacityChars(), 0); - assertEquals(p.capacityComponents(), 0); + assertEquals(0, p.length); p = new CategoryPath("hello", '/'); - assertEquals(p.length(), 1); - assertEquals(p.capacityChars(), 5); - assertEquals(p.capacityComponents(), 1); + assertEquals(p.length, 1); assertEquals(p.toString('@'), "hello"); p = new CategoryPath("hi/there", '/'); - assertEquals(p.length(), 2); - assertEquals(p.capacityChars(), 7); - assertEquals(p.capacityComponents(), 2); + assertEquals(p.length, 2); assertEquals(p.toString('@'), "hi@there"); p = new CategoryPath("how/are/you/doing?", '/'); - assertEquals(p.length(), 4); - assertEquals(p.capacityChars(), 15); - assertEquals(p.capacityComponents(), 4); + assertEquals(p.length, 4); assertEquals(p.toString('@'), "how@are@you@doing?"); } - @Test + @Test public void testDefaultConstructor() { // test that the default constructor (no parameters) currently // defaults to creating an object with a 0 initial capacity. // If we change this default later, we also need to change this // test. - CategoryPath p = new CategoryPath(); - assertEquals(0, p.capacityChars()); - assertEquals(0, p.capacityComponents()); - assertEquals(0, p.length()); + CategoryPath p = CategoryPath.EMPTY; + assertEquals(0, p.length); assertEquals("", p.toString('/')); } @Test - public void testAddEmpty() { - // In the current implementation, p.add("") should add en empty - // component (which is, admitingly, not a useful case. On the other - // hand, p.add("", delimiter) should add no components at all. - // Verify this: - CategoryPath p = new CategoryPath(0, 0); - p.add(""); - assertEquals(1, p.length()); - p.add(""); - assertEquals(2, p.length()); - p.add("", '/'); - assertEquals(2, p.length()); - p.clear(); - p.add("", '/'); - assertEquals(0, p.length()); - } - - @Test - public void testDelimiterAdd() { - // Test that the add() that takes a string and a delimiter - // works correctly. Note that unlike the constructor test above, - // we can't expect the capacity to grow to exactly the length of - // the given category, so we do not test this. - CategoryPath p = new CategoryPath(0, 0); - p.add("", '/'); - assertEquals(0, p.length()); - assertEquals("", p.toString('@'), ""); - p.clear(); - p.add("hello", '/'); - assertEquals(p.length(), 1); - assertEquals(p.toString('@'), "hello"); - p.clear(); - p.add("hi/there", '/'); - assertEquals(p.length(), 2); - assertEquals(p.toString('@'), "hi@there"); - p.clear(); - p.add("how/are/you/doing?", '/'); - assertEquals(p.length(), 4); - assertEquals(p.toString('@'), "how@are@you@doing?"); - // See that this is really an add, not replace: - p.clear(); - p.add("hi/there", '/'); - assertEquals(p.length(), 2); - assertEquals(p.toString('@'), "hi@there"); - p.add("how/are/you/doing", '/'); - assertEquals(p.length(), 6); - assertEquals(p.toString('@'), "hi@there@how@are@you@doing"); - } - - @Test - public void testCopyConstructor() { - CategoryPath p = new CategoryPath(0,0); - int expectedchars=0; - for (int i=0; i<1000; i++) { - CategoryPath clone = new CategoryPath(p); - assertEquals(p.length(), clone.length()); - assertEquals(p.toString('/'), clone.toString('/')); - // verify that the newly created clone has exactly the right - // capacity, with no spare (while the original path p probably - // does have spare) - assertEquals(i, clone.capacityComponents()); - assertEquals(expectedchars, clone.capacityChars()); - // Finally, add another component to the path, for the next - // round of this loop - String num = Integer.toString(i); - p.add(num); - expectedchars+=num.length(); - } - } - - @Test - public void testPrefixCopyConstructor() { - CategoryPath p = new CategoryPath(0,0); - p.add("hi"); - p.add("there"); - p.add("man"); - assertEquals(p.length(), 3); + public void testSubPath() { + final CategoryPath p = new CategoryPath("hi", "there", "man"); + assertEquals(p.length, 3); - CategoryPath p1 = new CategoryPath(p,2); - assertEquals(2, p1.length()); + CategoryPath p1 = p.subpath(2); + assertEquals(2, p1.length); assertEquals("hi/there", p1.toString('/')); - // the new prefix object should only take the space it needs: - assertEquals(2, p1.capacityComponents()); - assertEquals(7, p1.capacityChars()); - p1 = new CategoryPath(p,1); - assertEquals(1, p1.length()); + p1 = p.subpath(1); + assertEquals(1, p1.length); assertEquals("hi", p1.toString('/')); - assertEquals(1, p1.capacityComponents()); - assertEquals(2, p1.capacityChars()); - p1 = new CategoryPath(p,0); - assertEquals(0, p1.length()); + p1 = p.subpath(0); + assertEquals(0, p1.length); assertEquals("", p1.toString('/')); - assertEquals(0, p1.capacityComponents()); - assertEquals(0, p1.capacityChars()); - // with all the following lengths, the prefix should be the whole path: + // with all the following lengths, the prefix should be the whole path int[] lengths = { 3, -1, 4 }; - for (int i=0; i0) { - expectedCharsNeeded++; - } - assertEquals(expectedCharsNeeded, p.charsNeededForFullPath()); + for (String comp : components) { + expectedCharsNeeded += comp.length(); } + expectedCharsNeeded += cp.length - 1; // delimiter chars + assertEquals(expectedCharsNeeded, cp.fullPathLength()); } @Test public void testCopyToCharArray() { - String[] components = { "hello", "world", "yo" }; - CategoryPath p = new CategoryPath(components); - char[] charArray = new char[p.charsNeededForFullPath()]; - int numCharsCopied = 0; - - numCharsCopied = p.copyToCharArray(charArray, 0, 0, '.'); - assertEquals(0, numCharsCopied); - assertEquals("", new String(charArray, 0, numCharsCopied)); - - numCharsCopied = p.copyToCharArray(charArray, 0, 1, '.'); - assertEquals(5, numCharsCopied); - assertEquals("hello", new String(charArray, 0, numCharsCopied)); - - numCharsCopied = p.copyToCharArray(charArray, 0, 3, '.'); - assertEquals(14, numCharsCopied); + CategoryPath p = new CategoryPath("hello", "world", "yo"); + char[] charArray = new char[p.fullPathLength()]; + int numCharsCopied = p.copyFullPath(charArray, 0, '.'); + assertEquals(p.fullPathLength(), numCharsCopied); assertEquals("hello.world.yo", new String(charArray, 0, numCharsCopied)); - - numCharsCopied = p.copyToCharArray(charArray, 0, -1, '.'); - assertEquals(14, numCharsCopied); - assertEquals("hello.world.yo", new String(charArray, 0, numCharsCopied)); - numCharsCopied = p.copyToCharArray(charArray, 0, 4, '.'); - assertEquals(14, numCharsCopied); - assertEquals("hello.world.yo", new String(charArray, 0, numCharsCopied)); } @Test - public void testCharSerialization() throws Exception { - CategoryPath[] testCategories = { - new CategoryPath("hi", "there", "man"), - new CategoryPath("hello"), - new CategoryPath("what's", "up"), - // See that an empty category, which generates a (char)0, - // doesn't cause any problems in the middle of the serialization: - new CategoryPath(), - new CategoryPath("another", "example"), - new CategoryPath(), - new CategoryPath() - }; - StringBuilder sb = new StringBuilder(); - for (int i=0; i"; } - if (path.length()==0) { + if (path.length==0) { return ""; } return "<"+path.toString('/')+">"; @@ -304,9 +304,9 @@ tw.close(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(1, tr.getSize()); - assertEquals(0, tr.getPath(0).length()); + assertEquals(0, tr.getPath(0).length); assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0)); - assertEquals(0, tr.getOrdinal(new CategoryPath())); + assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY)); tr.close(); indexDir.close(); } @@ -323,9 +323,9 @@ tw.commit(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(1, tr.getSize()); - assertEquals(0, tr.getPath(0).length()); + assertEquals(0, tr.getPath(0).length); assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0)); - assertEquals(0, tr.getOrdinal(new CategoryPath())); + assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY)); tw.close(); tr.close(); indexDir.close(); @@ -416,7 +416,7 @@ ", but this is not a valid category."); } // verify that the parent is indeed my parent, according to the strings - if (!new CategoryPath(me, me.length()-1).equals(parent)) { + if (!me.subpath(me.length-1).equals(parent)) { fail("Got parent "+parentOrdinal+" for ordinal "+ordinal+ " but categories are "+showcat(parent)+" and "+showcat(me)+ " respectively."); @@ -506,7 +506,7 @@ } // verify that the parent is indeed my parent, according to the // strings - if (!new CategoryPath(me, me.length() - 1).equals(parent)) { + if (!me.subpath(me.length - 1).equals(parent)) { fail("Got parent " + parentOrdinal + " for ordinal " + ordinal + " but categories are " + showcat(parent) + " and " + showcat(me) + " respectively."); Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java (working copy) @@ -81,7 +81,6 @@ } private void validate(Directory dest, Directory src, OrdinalMap ordMap) throws Exception { - CategoryPath cp = new CategoryPath(); DirectoryTaxonomyReader destTR = new DirectoryTaxonomyReader(dest); try { final int destSize = destTR.getSize(); @@ -98,7 +97,7 @@ // validate that all source categories exist in destination, and their // ordinals are as expected. for (int j = 1; j < srcSize; j++) { - srcTR.getPath(j, cp); + CategoryPath cp = srcTR.getPath(j); int destOrdinal = destTR.getOrdinal(cp); assertTrue(cp + " not found in destination", destOrdinal > 0); assertEquals(destOrdinal, map[j]); Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java (working copy) @@ -48,12 +48,8 @@ @Override public int get(CategoryPath categoryPath) { return -1; } @Override - public int get(CategoryPath categoryPath, int length) { return -1; } - @Override public boolean put(CategoryPath categoryPath, int ordinal) { return true; } @Override - public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; } - @Override public boolean isFull() { return true; } @Override public void clear() {} @@ -108,9 +104,9 @@ CategoryPath cp = newCategory(); cats.add(cp); // add all prefixes to values - int level = cp.length(); + int level = cp.length; while (level > 0) { - String s = cp.toString('/', level); + String s = cp.subpath(level).toString('/'); values.put(s, s); --level; } @@ -134,11 +130,11 @@ for (String cat : values.keySet()) { CategoryPath cp = new CategoryPath(cat, '/'); assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0); - int level = cp.length(); + int level = cp.length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) - CategoryPath path = new CategoryPath(); + CategoryPath path = CategoryPath.EMPTY; for (int i = 0; i < level; i++) { - path.add(cp.getComponent(i)); + path = cp.subpath(i + 1); int ord = tr.getOrdinal(path); assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]); parentOrd = ord; // next level should have this parent Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (working copy) @@ -154,8 +154,8 @@ for (int i=0; i 0); - int level = cp.length(); + int level = cp.length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) - CategoryPath path = new CategoryPath(); + CategoryPath path = CategoryPath.EMPTY; for (int i = 0; i < level; i++) { - path.add(cp.getComponent(i)); + path = cp.subpath(i + 1); int ord = dtr.getOrdinal(path); assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]); parentOrd = ord; // next level should have this parent Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java (revision 1429478) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java (working copy) @@ -6,11 +6,13 @@ import java.nio.charset.CodingErrorAction; import java.util.HashMap; import java.util.Map; +import java.util.Random; import org.junit.Test; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.writercache.cl2o.CompactLabelToOrdinal; import org.apache.lucene.facet.taxonomy.writercache.cl2o.LabelToOrdinal; @@ -46,9 +48,10 @@ String[] uniqueValues = new String[numUniqueValues]; byte[] buffer = new byte[50]; + Random random = random(); for (int i = 0; i < numUniqueValues;) { - random().nextBytes(buffer); - int size = 1 + random().nextInt(50); + random.nextBytes(buffer); + int size = 1 + random.nextInt(buffer.length); // This test is turning random bytes into a string, // this is asking for trouble. @@ -56,16 +59,16 @@ .onUnmappableCharacter(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE); uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString(); - if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TerminatorChar) == -1) { + if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1) { i++; } } - TEMP_DIR.mkdirs(); - File f = new File(TEMP_DIR, "CompactLabelToOrdinalTest.tmp"); + File tmpDir = _TestUtil.getTempDir("testLableToOrdinal"); + File f = new File(tmpDir, "CompactLabelToOrdinalTest.tmp"); int flushInterval = 10; - for (int i = 0; i < n * 10; i++) { + for (int i = 0; i < n; i++) { if (i > 0 && i % flushInterval == 0) { compact.flush(f); compact = CompactLabelToOrdinal.open(f, 0.15f, 3); @@ -75,19 +78,16 @@ } } - int index = random().nextInt(numUniqueValues); + int index = random.nextInt(numUniqueValues); CategoryPath label = new CategoryPath(uniqueValues[index], '/'); int ord1 = map.getOrdinal(label); int ord2 = compact.getOrdinal(label); - //System.err.println(ord1+" "+ord2); - assertEquals(ord1, ord2); - if (ord1 == LabelToOrdinal.InvalidOrdinal) { + if (ord1 == LabelToOrdinal.INVALID_ORDINAL) { ord1 = compact.getNextOrdinal(); - map.addLabel(label, ord1); compact.addLabel(label, ord1); } @@ -108,25 +108,15 @@ @Override public void addLabel(CategoryPath label, int ordinal) { - map.put(new CategoryPath(label), ordinal); + map.put(label, ordinal); } @Override - public void addLabel(CategoryPath label, int prefixLen, int ordinal) { - map.put(new CategoryPath(label, prefixLen), ordinal); - } - - @Override public int getOrdinal(CategoryPath label) { Integer value = map.get(label); - return (value != null) ? value.intValue() : LabelToOrdinal.InvalidOrdinal; + return (value != null) ? value.intValue() : LabelToOrdinal.INVALID_ORDINAL; } - @Override - public int getOrdinal(CategoryPath label, int prefixLen) { - Integer value = map.get(new CategoryPath(label, prefixLen)); - return (value != null) ? value.intValue() : LabelToOrdinal.InvalidOrdinal; - } + } - } }