Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1428749) +++ lucene/CHANGES.txt (working copy) @@ -103,6 +103,8 @@ implementations. NOTE: indexes that contain category enhancements/associations are not supported by the new code and should be recreated. (Shai Erera) + +* LUCENE-4659: Massive cleanup to CategoryPath API. (Shai Erera) New Features Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java (revision 1428749) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java (working copy) @@ -53,13 +53,14 @@ facets.clear(); } int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc - for (int i=0; i { +public class CategoryPath implements Cloneable, Comparable { // A category path is a sequence of string components. It is kept // internally as one large character array "chars" with all the string @@ -66,66 +63,21 @@ */ public void trim(int nTrim) { if (nTrim >= this.ncomponents) { - clear(); + ncomponents = 0; } else if (nTrim > 0) { this.ncomponents -= nTrim; } } - /** - * Returns the current character capacity of the CategoryPath. The character - * capacity is the size of the internal buffer used to hold the characters - * of all the path's components. When a component is added and the capacity - * is not big enough, the buffer is automatically grown, and capacityChars() - * increases. - */ - public int capacityChars() { - return chars.length; - } - - /** - * Returns the current component capacity of the CategoryPath. The component - * capacity is the maximum number of components that the internal buffer can - * currently hold. When a component is added beyond this capacity, the - * buffer is automatically grown, and capacityComponents() increases. - */ - public int capacityComponents() { - return ends.length; - } - - /** - * Construct a new empty CategoryPath object. CategoryPath objects are meant - * to be reused, by add()ing components, and later clear()ing, and add()ing - * components again. The CategoryPath object is created with a buffer - * pre-allocated for a given number of characters and components, but the - * buffer will grow as necessary (see {@link #capacityChars()} and - * {@link #capacityComponents()}). - */ - public CategoryPath(int capacityChars, int capacityComponents) { + /** Create an empty CategoryPath object. */ + public CategoryPath() { ncomponents = 0; - chars = new char[capacityChars]; - ends = new short[capacityComponents]; + chars = new char[0]; + ends = new short[0]; } - /** - * Create an empty CategoryPath object. Equivalent to the constructor - * {@link #CategoryPath(int, int)} with the two initial-capacity arguments - * set to zero. - */ - public CategoryPath() { - this(0, 0); - } - - /** - * Add the given component to the end of the path. - *

- * Note that when a String object is passed to this method, a reference to - * it is not saved (rather, its content is copied), which will lead to that - * String object being gc'ed. To reduce the number of garbage objects, you - * can pass a mutable CharBuffer instead of an immutable String to this - * method. - */ - public void add(CharSequence component) { + /** Add the given component to the end of the path. */ + public void add(String component) { // Set the new end, increasing the "ends" array sizes if necessary: if (ncomponents >= ends.length) { short[] newends = new short[(ends.length + 1) * 2]; @@ -143,24 +95,12 @@ System.arraycopy(chars, 0, newchars, 0, chars.length); chars = newchars; } - for (int i = 0; i < cmplen; i++) { - chars[prevend++] = component.charAt(i); - } + component.getChars(0, component.length(), chars, prevend); ncomponents++; } /** - * Empty the CategoryPath object, so that it has zero components. The - * capacity of the object (see {@link #capacityChars()} and - * {@link #capacityComponents()}) is not reduced, so that the object can be - * reused without frequent reallocations. - */ - public void clear() { - ncomponents = 0; - } - - /** * Build a string representation of the path, with its components separated * by the given delimiter character. The resulting string is appended to a * given Appendable, e.g., a StringBuilder, CharBuffer or Writer. @@ -276,7 +216,7 @@ /** * This method, an implementation of the {@link Object#toString()} * interface, is to allow simple printing of a CategoryPath, for debugging - * purposes. When possible, it recommended to avoid using it it, and rather, + * purposes. When possible, it recommended to avoid using it, and rather, * if you want to output the path with its components separated by a * delimiter character, specify the delimiter explicitly, with * {@link #toString(char)}. @@ -312,39 +252,6 @@ } /** - * like {@link #toString(char)}, but takes only a part of the path, rather - * than the whole path. - *

- * start specifies the first component in the subpath, and - * end is one past the last component. If start is - * negative, 0 is assumed, and if end is negative or past the - * end of the path, the path is taken until the end. Otherwise, if - * end<=start, an empty string is returned. An emptry string is - * returned also in the case that the path is empty. - */ - public String toString(char delimiter, int start, int end) { - if (start < 0) { - start = 0; - } - if (end < 0 || end > ncomponents) { - end = ncomponents; - } - if (end <= start) { - return ""; - } - int startchar = (start == 0) ? 0 : ends[start - 1]; - StringBuilder sb = new StringBuilder(ends[end - 1] - startchar - + (end - start) - 1); - try { - this.appendTo(sb, delimiter, start, end); - } catch (IOException e) { - // can't happen, because sb.append() never actually throws an - // exception - } - return sb.toString(); - } - - /** * Return the i'th component of the path, in a new String object. If there * is no i'th component, a null is returned. */ @@ -359,21 +266,6 @@ } /** - * Return the last component of the path, in a new String object. If the - * path is empty, a null is returned. - */ - public String lastComponent() { - if (ncomponents == 0) { - return null; - } - if (ncomponents == 1) { - return new String(chars, 0, ends[0]); - } - return new String(chars, ends[ncomponents - 2], ends[ncomponents - 1] - - ends[ncomponents - 2]); - } - - /** * Copies the specified number of components from this category path to the * specified character array, with the components separated by a given * delimiter character. The array must be large enough to hold the @@ -458,8 +350,7 @@ // To do this, we unfortunately need to make an additional pass on the // given string: int nparts = 1; - for (int i = pathString.indexOf(delimiter); i >= 0; i = pathString - .indexOf(delimiter, i + 1)) { + for (int i = pathString.indexOf(delimiter); i >= 0; i = pathString.indexOf(delimiter, i + 1)) { nparts++; } @@ -467,23 +358,6 @@ chars = new char[pathString.length() - nparts + 1]; ncomponents = 0; - add(pathString, delimiter); - } - - /** - * Add the given components to the end of the path. The components are given - * in a single string, separated by a given delimiter character. If the - * given string is empty, it is assumed to refer to the root (empty) - * category, and nothing is added to the path (rather than adding a single - * empty component). - *

- * Note that when a String object is passed to this method, a reference to - * it is not saved (rather, its content is copied), which will lead to that - * String object being gc'ed. To reduce the number of garbage objects, you - * can pass a mutable CharBuffer instead of an immutable String to this - * method. - */ - public void add(CharSequence pathString, char delimiter) { int len = pathString.length(); if (len == 0) { return; // assume root category meant, so add nothing. @@ -520,73 +394,24 @@ /** * Construct a new CategoryPath object, copying an existing path given as an * array of strings. - *

- * The new object occupies exactly the space it needs, without any spare - * capacity. This is the expected behavior in the typical use case, where - * this constructor is used to create a temporary object which is never - * reused. */ - public CategoryPath(CharSequence... components) { - this.ncomponents = (short) components.length; - this.ends = new short[ncomponents]; + public CategoryPath(String... components) { + ncomponents = (short) components.length; + ends = new short[ncomponents]; if (ncomponents > 0) { - this.ends[0] = (short) components[0].length(); + ends[0] = (short) components[0].length(); for (int i = 1; i < ncomponents; i++) { - this.ends[i] = (short) (this.ends[i - 1] + components[i] - .length()); + ends[i] = (short) (ends[i - 1] + components[i].length()); } - this.chars = new char[this.ends[ncomponents - 1]]; - CharSequence cs = components[0]; - if (cs instanceof String) { - ((String) cs).getChars(0, cs.length(), this.chars, 0); - } else { - for (int j = 0, k = cs.length(); j < k; j++) { - this.chars[j] = cs.charAt(j); - } - } + chars = new char[ends[ncomponents - 1]]; + components[0].getChars(0, components[0].length(), chars, 0); for (int i = 1; i < ncomponents; i++) { - cs = components[i]; - int offset = this.ends[i - 1]; - if (cs instanceof String) { - ((String) cs).getChars(0, cs.length(), this.chars, offset); - } else { - for (int j = 0, k = cs.length(); j < k; j++) { - this.chars[j + offset] = cs.charAt(j); - } - } + int offset = ends[i - 1]; + components[i].getChars(0, components[i].length(), chars, offset); } } else { - this.chars = new char[0]; - } - } - - /** - * Construct a new CategoryPath object, copying the path given in an - * existing CategoryPath object. - *

- * This copy-constructor is handy when you need to save a reference to a - * CategoryPath (e.g., when it serves as a key to a hash-table), but cannot - * save a reference to the original object because its contents can be - * changed later by the user. Copying the contents into a new object is a - * solution. - *

- * This constructor does not copy the capacity (spare buffer size) - * of the existing CategoryPath. Rather, the new object occupies exactly the - * space it needs, without any spare. This is the expected behavior in the - * typical use case outlined in the previous paragraph. - */ - public CategoryPath(CategoryPath existing) { - ncomponents = existing.ncomponents; - if (ncomponents == 0) { chars = new char[0]; - ends = new short[0]; - return; } - - chars = new char[existing.ends[ncomponents - 1]]; - System.arraycopy(existing.chars, 0, chars, 0, chars.length); - ends = new short[ncomponents]; - System.arraycopy(existing.ends, 0, ends, 0, ends.length); } /** @@ -623,7 +448,18 @@ @Override public CategoryPath clone() { - return new CategoryPath(this); + CategoryPath clone = new CategoryPath(); + clone.ncomponents = ncomponents; + if (ncomponents == 0) { + clone.chars = new char[0]; + clone.ends = new short[0]; + } else { + clone.chars = new char[ends[ncomponents - 1]]; + System.arraycopy(chars, 0, clone.chars, 0, clone.chars.length); + clone.ends = new short[ncomponents]; + System.arraycopy(ends, 0, clone.ends, 0, clone.ends.length); + } + return clone; } /** @@ -663,28 +499,6 @@ } /** - * Test whether this object is a descendant of another CategoryPath. This is - * true if the other CategoryPath is the prefix of this. - */ - public boolean isDescendantOf(CategoryPath other) { - if (this.ncomponents < other.ncomponents) { - return false; - } - int j = 0; - for (int i = 0; i < other.ncomponents; i++) { - if (ends[i] != other.ends[i]) { - return false; - } - for (; j < ends[i]; j++) { - if (this.chars[j] != other.chars[j]) { - return false; - } - } - } - return true; - } - - /** * Calculate a hashCode for this path, used when a CategoryPath serves as a * hash-table key. If two objects are equal(), their hashCodes need to be * equal, so like in equal(), hashCode does not consider unused portions of @@ -954,71 +768,6 @@ } /** - * Serializes the content of this CategoryPath to a byte stream, using UTF-8 - * encoding to convert characters to bytes, and treating the ends as 16-bit - * characters. - * - * @param osw - * The output byte stream. - * @throws IOException - * If there are encoding errors. - */ - // TODO (Facet): consolidate all de/serialize method names to - // serialize() and unserialize() - public void serializeToStreamWriter(OutputStreamWriter osw) - throws IOException { - osw.write(this.ncomponents); - if (this.ncomponents <= 0) { - return; - } - for (int j = 0; j < this.ncomponents; j++) { - osw.write(this.ends[j]); - } - osw.write(this.chars, 0, this.ends[this.ncomponents - 1]); - } - - /** - * Serializes the content of this CategoryPath to a byte stream, using UTF-8 - * encoding to convert characters to bytes, and treating the ends as 16-bit - * characters. - * - * @param isr - * The input stream. - * @throws IOException - * If there are encoding errors. - */ - public void deserializeFromStreamReader(InputStreamReader isr) - throws IOException { - this.ncomponents = (short) isr.read(); - if (this.ncomponents <= 0) { - return; - } - if (this.ends == null || this.ends.length < this.ncomponents) { - this.ends = new short[this.ncomponents]; - } - for (int j = 0; j < this.ncomponents; j++) { - this.ends[j] = (short) isr.read(); - } - if (this.chars == null - || this.ends[this.ncomponents - 1] > chars.length) { - this.chars = new char[this.ends[this.ncomponents - 1]]; - } - isr.read(this.chars, 0, this.ends[this.ncomponents - 1]); - } - - private void writeObject(java.io.ObjectOutputStream out) - throws IOException { - OutputStreamWriter osw = new OutputStreamWriter(out, "UTF-8"); - this.serializeToStreamWriter(osw); - osw.flush(); - } - - private void readObject(java.io.ObjectInputStream in) throws IOException { - InputStreamReader isr = new InputStreamReader(in, "UTF-8"); - this.deserializeFromStreamReader(isr); - } - - /** * Compares this CategoryPath with the other CategoryPath for lexicographic * order. * Returns a negative integer, zero, or a positive integer as this @@ -1051,4 +800,5 @@ // one is a prefix of the other return this.length() - other.length(); } + } Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (revision 1428749) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (working copy) @@ -205,22 +205,9 @@ */ public abstract int getParent(int ordinal) throws IOException; - /** - * Returns the path name of the category with the given ordinal. The path is - * returned as a new CategoryPath object - to reuse an existing object, use - * {@link #getPath(int, CategoryPath)}. - * - * @return a {@link CategoryPath} with the required path, or {@code null} if - * the given ordinal is unknown to the taxonomy. - */ + /** Returns the path name of the category with the given ordinal. */ public abstract CategoryPath getPath(int ordinal) throws IOException; - /** - * Same as {@link #getPath(int)}, only reuses the given {@link CategoryPath} - * instances. - */ - public abstract boolean getPath(int ordinal, CategoryPath result) throws IOException; - /** Returns the current refCount for this taxonomy reader. */ public final int getRefCount() { return refCount.get(); Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (revision 1428749) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (working copy) @@ -349,18 +349,6 @@ } @Override - public boolean getPath(int ordinal, CategoryPath result) throws IOException { - ensureOpen(); - String label = getLabel(ordinal); - if (label == null) { - return false; - } - result.clear(); - result.add(label, delimiter); - return true; - } - - @Override public int getSize() { ensureOpen(); return indexReader.numDocs(); Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (revision 1428749) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (working copy) @@ -760,7 +760,6 @@ boolean aborted = false; DirectoryReader reader = readerManager.acquire(); try { - CategoryPath cp = new CategoryPath(); TermsEnum termsEnum = null; DocsEnum docsEnum = null; for (AtomicReaderContext ctx : reader.leaves()) { @@ -775,8 +774,7 @@ // hence documents), there are no deletions in the index. Therefore, it // is sufficient to call next(), and then doc(), exactly once with no // 'validation' checks. - cp.clear(); - cp.add(t.utf8ToString(), delimiter); + CategoryPath cp = new CategoryPath(t.utf8ToString(), delimiter); docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); boolean res = cache.put(cp, docsEnum.nextDoc() + ctx.docBase); assert !res : "entries should not have been evicted from the cache"; @@ -857,7 +855,6 @@ final int size = r.numDocs(); final OrdinalMap ordinalMap = map; ordinalMap.setSize(size); - CategoryPath cp = new CategoryPath(); int base = 0; TermsEnum te = null; DocsEnum docs = null; @@ -867,8 +864,7 @@ te = terms.iterator(te); while (te.next() != null) { String value = te.term().utf8ToString(); - cp.clear(); - cp.add(value, Consts.DEFAULT_DELIMITER); + CategoryPath cp = new CategoryPath(value, Consts.DEFAULT_DELIMITER); final int ordinal = addCategory(cp); docs = te.docs(null, docs, DocsEnum.FLAG_NONE); ordinalMap.addMapping(docs.nextDoc() + base, ordinal); Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/NameIntCacheLRU.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/NameIntCacheLRU.java (revision 1428749) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/lru/NameIntCacheLRU.java (working copy) @@ -76,7 +76,7 @@ // CategoryPath object is mutable, so we cannot save a reference to an // existing CategoryPath. Subclasses which override this method can // avoid this cloning by, e.g., hashing the name. - return new CategoryPath(name); + return name.clone(); } Object key(CategoryPath name, int prefixLen) { Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestCategoryPath.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestCategoryPath.java (revision 1428749) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestCategoryPath.java (working copy) @@ -1,16 +1,10 @@ package org.apache.lucene.facet.taxonomy; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; +import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.facet.taxonomy.CategoryPath; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -32,53 +26,14 @@ @Test public void testBasic() { - CategoryPath p = new CategoryPath(0,0); - assertEquals(0, p.length()); - for (int i=0; i<1000; i++) { - p.add("hello"); - assertEquals(i+1, p.length()); - } + assertEquals(0, new CategoryPath().length()); + assertEquals(1, new CategoryPath("hello").length()); + assertEquals(2, new CategoryPath("hello", "world").length()); } @Test - public void testConstructorCapacity() { - CategoryPath p = new CategoryPath(0,0); - assertEquals(0, p.capacityChars()); - assertEquals(0, p.capacityComponents()); - assertEquals(0, p.length()); - p = new CategoryPath(5,18); - assertEquals(5, p.capacityChars()); - assertEquals(18, p.capacityComponents()); - assertEquals(0, p.length()); - p = new CategoryPath(27,13); - assertEquals(27, p.capacityChars()); - assertEquals(13, p.capacityComponents()); - assertEquals(0, p.length()); - } - - @Test - public void testClear() { - CategoryPath p = new CategoryPath(0,0); - p.add("hi"); - p.add("there"); - assertEquals(2, p.length()); - p.clear(); - assertEquals(0, p.length()); - p.add("yo!"); - assertEquals(1, p.length()); - } - - @Test public void testTrim() { - CategoryPath p = new CategoryPath(0,0); - p.add("this"); - p.add("message"); - p.add("will"); - p.add("self"); - p.add("destruct"); - p.add("in"); - p.add("five"); - p.add("seconds"); + CategoryPath p = new CategoryPath("this", "message", "will", "self", "destruct", "in", "five", "seconds"); assertEquals(8, p.length()); p.trim(3); assertEquals(5, p.length()); @@ -90,77 +45,16 @@ assertEquals(4, p.length()); p.trim(8); // clear assertEquals(0, p.length()); - p.add("yo!"); - assertEquals(1, p.length()); - p.trim(1); // clear - assertEquals(0, p.length()); } @Test - public void testComponentsLimit() { - // Test that we can add up to 2^15-1 components - CategoryPath p = new CategoryPath(0,0); - for (int i=0; i<32767; i++) { - p.add(""); - assertEquals(i+1, p.length()); - } - // Also see that in the current implementation, this is actually - // the limit: if we add one more component, things break (because - // we used a short to hold ncomponents). See that it breaks in the - // way we expect it to: - p.add(""); // this still works, but... - assertEquals(-32768, p.length()); // now the length is wrong and negative - } - - @Test - public void testCharsLimit() { - // Test that we can add up to 2^15-1 characters - CategoryPath p = new CategoryPath(0,0); - for (int i=0; i<8192; i++) { - p.add("aaaa"); - } - // Also see that in the current implementation, this is actually the - // limit: If we add one more character, things break (because ends[] - // is an array of shorts), and we actually get an exception. - try { - p.add("a"); - fail("Should have thrown an exception"); - } catch (ArrayIndexOutOfBoundsException e) { - // good. - } - } - - @Test public void testToString() { - CategoryPath p = new CategoryPath(0,0); // When the category is empty, we expect an empty string - assertEquals("", p.toString('/')); - // This is (deliberately, in our implementation) indistinguishable - // from the case of a single empty component: - p.add(""); - assertEquals("", p.toString('/')); - // Check just one category (so no delimiter needed): - p.clear(); - p.add("hello"); - assertEquals("hello", p.toString('/')); - // Now for two categories: - p.clear(); - p.add("hello"); - p.add("world"); - assertEquals("hello/world", p.toString('/')); - // And for a thousand... - p.clear(); - p.add("0"); - StringBuilder expected = new StringBuilder("0"); - for (int i=1; i<1000; i++) { - String num = Integer.toString(i); - p.add(num); - expected.append('/'); - expected.append(num); - } - assertEquals(expected.toString(), p.toString('/')); - // Check that toString() without a parameter just defaults to '/': - assertEquals(expected.toString(), p.toString()); + assertEquals("", new CategoryPath().toString('/')); + // one category (so no delimiter needed) + assertEquals("hello", new CategoryPath("hello").toString('/')); + // more than one category (so no delimiter needed) + assertEquals("hello/world", new CategoryPath("hello", "world").toString('/')); } // testing toString() and its variants already test most of the appendTo() @@ -168,7 +62,7 @@ // this for us). Here we complete the coverage of the appendTo() methods: @Test public void testAppendTo() throws IOException { - CategoryPath p = new CategoryPath(0,0); + CategoryPath p = new CategoryPath(); StringBuilder sb = new StringBuilder(); p.appendTo(sb, '/'); assertEquals(0, sb.length()); @@ -181,25 +75,13 @@ } @Test - public void testLastComponent() { - CategoryPath p = new CategoryPath(1000,1000); - // When the category is empty, we expect a null - assertNull(p.lastComponent()); - for (int i=0; i<=100; i++) { - String num = Integer.toString(i); - p.add(num); - assertEquals(num, p.lastComponent()); - } - } - - @Test public void testGetComponent() { - CategoryPath p = new CategoryPath(1000,1000); + CategoryPath p = new CategoryPath(); // When the category is empty, we expect a null assertNull(p.getComponent(0)); assertNull(p.getComponent(1)); assertNull(p.getComponent(-1)); - for (int i=0; i<=100; i++) { + for (int i = 0; i <= 100; i++) { p.add(Integer.toString(i)); for (int j=0; j<=i; j++) { assertEquals(j, Integer.parseInt(p.getComponent(j))); @@ -211,10 +93,7 @@ @Test public void testToStringPrefix() { - CategoryPath p = new CategoryPath(0,0); - p.add("hi"); - p.add("there"); - p.add("man"); + CategoryPath p = new CategoryPath("hi", "there", "man"); assertEquals("hi/there/man", p.toString('/')); assertEquals("", p.toString('/', 0)); assertEquals("hi", p.toString('/', 1)); @@ -225,50 +104,20 @@ } @Test - public void testToStringSubpath() { - CategoryPath p = new CategoryPath(0,0); - assertEquals("", p.toString('/', 0, 0)); - p.add("hi"); - p.add("there"); - p.add("man"); - assertEquals("", p.toString('/', 0, 0)); - assertEquals("hi", p.toString('/', 0, 1)); - assertEquals("hi/there", p.toString('/', 0, 2)); - assertEquals("hi/there/man", p.toString('/', 0, 3)); - assertEquals("hi/there/man", p.toString('/', 0, 4)); - assertEquals("hi/there/man", p.toString('/', 0, -1)); - assertEquals("hi/there/man", p.toString('/', -1, -1)); - assertEquals("there/man", p.toString('/', 1, -1)); - assertEquals("man", p.toString('/', 2, -1)); - assertEquals("", p.toString('/', 3, -1)); - assertEquals("there/man", p.toString('/', 1, 3)); - assertEquals("there", p.toString('/', 1, 2)); - assertEquals("", p.toString('/', 1, 1)); - } - - @Test public void testDelimiterConstructor() { // Test that the constructor that takes a string and a delimiter // works correctly. Also check that it allocates exactly the needed // needed size for the array - not more. CategoryPath p = new CategoryPath("", '/'); assertEquals(p.length(), 0); - assertEquals(p.capacityChars(), 0); - assertEquals(p.capacityComponents(), 0); p = new CategoryPath("hello", '/'); assertEquals(p.length(), 1); - assertEquals(p.capacityChars(), 5); - assertEquals(p.capacityComponents(), 1); assertEquals(p.toString('@'), "hello"); p = new CategoryPath("hi/there", '/'); assertEquals(p.length(), 2); - assertEquals(p.capacityChars(), 7); - assertEquals(p.capacityComponents(), 2); assertEquals(p.toString('@'), "hi@there"); p = new CategoryPath("how/are/you/doing?", '/'); assertEquals(p.length(), 4); - assertEquals(p.capacityChars(), 15); - assertEquals(p.capacityComponents(), 4); assertEquals(p.toString('@'), "how@are@you@doing?"); } @@ -279,109 +128,40 @@ // If we change this default later, we also need to change this // test. CategoryPath p = new CategoryPath(); - assertEquals(0, p.capacityChars()); - assertEquals(0, p.capacityComponents()); assertEquals(0, p.length()); assertEquals("", p.toString('/')); } @Test - public void testAddEmpty() { - // In the current implementation, p.add("") should add en empty - // component (which is, admitingly, not a useful case. On the other - // hand, p.add("", delimiter) should add no components at all. - // Verify this: - CategoryPath p = new CategoryPath(0, 0); - p.add(""); - assertEquals(1, p.length()); - p.add(""); - assertEquals(2, p.length()); - p.add("", '/'); - assertEquals(2, p.length()); - p.clear(); - p.add("", '/'); - assertEquals(0, p.length()); - } - - @Test - public void testDelimiterAdd() { - // Test that the add() that takes a string and a delimiter - // works correctly. Note that unlike the constructor test above, - // we can't expect the capacity to grow to exactly the length of - // the given category, so we do not test this. - CategoryPath p = new CategoryPath(0, 0); - p.add("", '/'); - assertEquals(0, p.length()); - assertEquals("", p.toString('@'), ""); - p.clear(); - p.add("hello", '/'); - assertEquals(p.length(), 1); - assertEquals(p.toString('@'), "hello"); - p.clear(); - p.add("hi/there", '/'); - assertEquals(p.length(), 2); - assertEquals(p.toString('@'), "hi@there"); - p.clear(); - p.add("how/are/you/doing?", '/'); - assertEquals(p.length(), 4); - assertEquals(p.toString('@'), "how@are@you@doing?"); - // See that this is really an add, not replace: - p.clear(); - p.add("hi/there", '/'); - assertEquals(p.length(), 2); - assertEquals(p.toString('@'), "hi@there"); - p.add("how/are/you/doing", '/'); - assertEquals(p.length(), 6); - assertEquals(p.toString('@'), "hi@there@how@are@you@doing"); - } - - @Test - public void testCopyConstructor() { - CategoryPath p = new CategoryPath(0,0); - int expectedchars=0; - for (int i=0; i<1000; i++) { - CategoryPath clone = new CategoryPath(p); + public void testClone() { + CategoryPath p = new CategoryPath(); + for (int i = 0; i < 1000; i++) { + CategoryPath clone = p.clone(); assertEquals(p.length(), clone.length()); assertEquals(p.toString('/'), clone.toString('/')); - // verify that the newly created clone has exactly the right - // capacity, with no spare (while the original path p probably - // does have spare) - assertEquals(i, clone.capacityComponents()); - assertEquals(expectedchars, clone.capacityChars()); // Finally, add another component to the path, for the next // round of this loop String num = Integer.toString(i); p.add(num); - expectedchars+=num.length(); } } @Test public void testPrefixCopyConstructor() { - CategoryPath p = new CategoryPath(0,0); - p.add("hi"); - p.add("there"); - p.add("man"); + CategoryPath p = new CategoryPath("hi", "there", "man"); assertEquals(p.length(), 3); CategoryPath p1 = new CategoryPath(p,2); assertEquals(2, p1.length()); assertEquals("hi/there", p1.toString('/')); - // the new prefix object should only take the space it needs: - assertEquals(2, p1.capacityComponents()); - assertEquals(7, p1.capacityChars()); p1 = new CategoryPath(p,1); assertEquals(1, p1.length()); assertEquals("hi", p1.toString('/')); - assertEquals(1, p1.capacityComponents()); - assertEquals(2, p1.capacityChars()); p1 = new CategoryPath(p,0); assertEquals(0, p1.length()); assertEquals("", p1.toString('/')); - assertEquals(0, p1.capacityComponents()); - assertEquals(0, p1.capacityChars()); // with all the following lengths, the prefix should be the whole path: int[] lengths = { 3, -1, 4 }; @@ -390,139 +170,31 @@ assertEquals(3, p1.length()); assertEquals("hi/there/man", p1.toString('/')); assertEquals(p, p1); - assertEquals(3, p1.capacityComponents()); - assertEquals(10, p1.capacityChars()); } } @Test public void testEquals() { - // check that two empty paths are equal, even if they have different - // capacities: - CategoryPath p1 = new CategoryPath(0,0); - CategoryPath p2 = new CategoryPath(1000,300); - assertEquals(true, p1.equals(p2)); - // If we make p2 different, it is no longer equals: - p2.add("hi"); - assertEquals(false, p1.equals(p2)); - // A categoryPath is definitely not equals to an object of some other - // type: - assertEquals(false, p1.equals(Integer.valueOf(3))); - // Build two paths separately, and compare them - p1.clear(); - p1.add("hello"); - p1.add("world"); - p2.clear(); - p2.add("hello"); - p2.add("world"); - assertEquals(true, p1.equals(p2)); - // Check that comparison really don't look at old data which might - // be stored in the array - p1.clear(); - p1.add("averylongcategoryname"); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hi"); - assertEquals(true, p1.equals(p2)); - // Being of the same length is obviously not enough to be equal - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hello"); - assertEquals(false, p1.equals(p2)); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("ho"); - assertEquals(false, p1.equals(p2)); + assertEquals(new CategoryPath(), new CategoryPath()); + assertFalse(new CategoryPath().equals(new CategoryPath("hi"))); + assertFalse(new CategoryPath().equals(Integer.valueOf(3))); + assertEquals(new CategoryPath("hello", "world"), new CategoryPath("hello", "world")); } + @Test public void testHashCode() { - // Note: in this test, we assume that if two paths are not equal, - // their hash codes should come out differently. This is *not* - // always the case, but in the examples we use below, it comes out - // fine, and unless we have some really bad luck in changing our - // hash function, this should also remain true in the future. - - // check that two empty paths are equal, even if they have different - // capacities: - CategoryPath p1 = new CategoryPath(0,0); - CategoryPath p2 = new CategoryPath(1000,300); - assertEquals(p1.hashCode(), p2.hashCode()); - // If we make p2 different, it is no longer equals: - p2.add("hi"); - assertEquals(false, p1.hashCode()==p2.hashCode()); - // Build two paths separately, and compare them - p1.clear(); - p1.add("hello"); - p1.add("world"); - p2.clear(); - p2.add("hello"); - p2.add("world"); - assertEquals(p1.hashCode(), p2.hashCode()); - // Check that comparison really don't look at old data which might - // be stored in the array - p1.clear(); - p1.add("averylongcategoryname"); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hi"); - assertEquals(p1.hashCode(), p2.hashCode()); - // Being of the same length is obviously not enough to be equal - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hello"); - assertEquals(false, p1.hashCode()==p2.hashCode()); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("ho"); - assertEquals(false, p1.hashCode()==p2.hashCode()); + assertEquals(new CategoryPath().hashCode(), new CategoryPath().hashCode()); + assertFalse(new CategoryPath().hashCode() == new CategoryPath("hi").hashCode()); + assertEquals(new CategoryPath("hello", "world").hashCode(), new CategoryPath("hello", "world").hashCode()); } @Test public void testHashCodePrefix() { - // First, repeat the tests of testHashCode() using hashCode(-1) - // just to make sure nothing was broken in this variant: - CategoryPath p1 = new CategoryPath(0,0); - CategoryPath p2 = new CategoryPath(1000,300); - assertEquals(p1.hashCode(-1), p2.hashCode(-1)); - p2.add("hi"); - assertEquals(false, p1.hashCode(-1)==p2.hashCode(-1)); - p1.clear(); - p1.add("hello"); - p1.add("world"); - p2.clear(); - p2.add("hello"); - p2.add("world"); - assertEquals(p1.hashCode(-1), p2.hashCode(-1)); - p1.clear(); - p1.add("averylongcategoryname"); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hi"); - assertEquals(p1.hashCode(-1), p2.hashCode(-1)); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hello"); - assertEquals(false, p1.hashCode(-1)==p2.hashCode(-1)); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("ho"); - assertEquals(false, p1.hashCode(-1)==p2.hashCode(-1)); + assertEquals(new CategoryPath().hashCode(-1), new CategoryPath().hashCode(-1)); + assertFalse(new CategoryPath().hashCode(-1) == new CategoryPath("hi").hashCode(-1)); + assertEquals(new CategoryPath("hello", "world").hashCode(-1), new CategoryPath("hello", "world").hashCode(-1)); - // Now move to testing prefixes: - CategoryPath p = new CategoryPath(); - p.add("this"); - p.add("is"); - p.add("a"); - p.add("test"); + CategoryPath p = new CategoryPath("this", "is", "a", "test"); assertEquals(p.hashCode(), p.hashCode(4)); assertEquals(new CategoryPath().hashCode(), p.hashCode(0)); assertEquals(new CategoryPath(p, 1).hashCode(), p.hashCode(1)); @@ -532,98 +204,18 @@ @Test public void testLongHashCode() { - // Note: in this test, we assume that if two paths are not equal, - // their hash codes should come out differently. This is *not* - // always the case, but in the examples we use below, it comes out - // fine, and unless we have some really bad luck in changing our - // hash function, this should also remain true in the future. - - // check that two empty paths are equal, even if they have different - // capacities: - CategoryPath p1 = new CategoryPath(0,0); - CategoryPath p2 = new CategoryPath(1000,300); - assertEquals(p1.longHashCode(), p2.longHashCode()); - // If we make p2 different, it is no longer equals: - p2.add("hi"); - assertEquals(false, p1.longHashCode()==p2.longHashCode()); - // Build two paths separately, and compare them - p1.clear(); - p1.add("hello"); - p1.add("world"); - p2.clear(); - p2.add("hello"); - p2.add("world"); - assertEquals(p1.longHashCode(), p2.longHashCode()); - // Check that comparison really don't look at old data which might - // be stored in the array - p1.clear(); - p1.add("averylongcategoryname"); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hi"); - assertEquals(p1.longHashCode(), p2.longHashCode()); - // Being of the same length is obviously not enough to be equal - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hello"); - assertEquals(false, p1.longHashCode()==p2.longHashCode()); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("ho"); - assertEquals(false, p1.longHashCode()==p2.longHashCode()); + assertEquals(new CategoryPath().longHashCode(), new CategoryPath().longHashCode()); + assertFalse(new CategoryPath().longHashCode() == new CategoryPath("hi").longHashCode()); + assertEquals(new CategoryPath("hello", "world").longHashCode(), new CategoryPath("hello", "world").longHashCode()); } @Test public void testLongHashCodePrefix() { - // First, repeat the tests of testLongHashCode() using longHashCode(-1) - // just to make sure nothing was broken in this variant: + assertEquals(new CategoryPath().longHashCode(-1), new CategoryPath().longHashCode(-1)); + assertFalse(new CategoryPath().longHashCode(-1) == new CategoryPath("hi").longHashCode(-1)); + assertEquals(new CategoryPath("hello", "world").longHashCode(-1), new CategoryPath("hello", "world").longHashCode(-1)); - // check that two empty paths are equal, even if they have different - // capacities: - CategoryPath p1 = new CategoryPath(0,0); - CategoryPath p2 = new CategoryPath(1000,300); - assertEquals(p1.longHashCode(-1), p2.longHashCode(-1)); - // If we make p2 different, it is no longer equals: - p2.add("hi"); - assertEquals(false, p1.longHashCode(-1)==p2.longHashCode(-1)); - // Build two paths separately, and compare them - p1.clear(); - p1.add("hello"); - p1.add("world"); - p2.clear(); - p2.add("hello"); - p2.add("world"); - assertEquals(p1.longHashCode(-1), p2.longHashCode(-1)); - // Check that comparison really don't look at old data which might - // be stored in the array - p1.clear(); - p1.add("averylongcategoryname"); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hi"); - assertEquals(p1.longHashCode(-1), p2.longHashCode(-1)); - // Being of the same length is obviously not enough to be equal - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("hello"); - assertEquals(false, p1.longHashCode(-1)==p2.longHashCode(-1)); - p1.clear(); - p1.add("hi"); - p2.clear(); - p2.add("ho"); - assertEquals(false, p1.longHashCode(-1)==p2.longHashCode(-1)); - - // Now move to testing prefixes: - CategoryPath p = new CategoryPath(); - p.add("this"); - p.add("is"); - p.add("a"); - p.add("test"); + CategoryPath p = new CategoryPath("this", "is", "a", "test"); assertEquals(p.longHashCode(), p.longHashCode(4)); assertEquals(new CategoryPath().longHashCode(), p.longHashCode(0)); assertEquals(new CategoryPath(p, 1).longHashCode(), p.longHashCode(1)); @@ -635,14 +227,10 @@ public void testArrayConstructor() { CategoryPath p = new CategoryPath("hello", "world", "yo"); assertEquals(3, p.length()); - assertEquals(12, p.capacityChars()); - assertEquals(3, p.capacityComponents()); assertEquals("hello/world/yo", p.toString('/')); p = new CategoryPath(new String[0]); assertEquals(0, p.length()); - assertEquals(0, p.capacityChars()); - assertEquals(0, p.capacityComponents()); } @Test @@ -651,10 +239,10 @@ CategoryPath p = new CategoryPath(); assertEquals(0, p.charsNeededForFullPath()); int expectedCharsNeeded = 0; - for (int i=0; i0) { + if (i > 0) { expectedCharsNeeded++; } assertEquals(expectedCharsNeeded, p.charsNeededForFullPath()); @@ -724,7 +312,7 @@ // 2^15-1 character limit that CategoryPath allows: sb = new StringBuilder(); CategoryPath p = new CategoryPath(); - for (int i=0; i<1000; i++) { + for (int i = 0; i < 1000; i++) { p.add(Integer.toString(i)); } p.serializeAppendTo(sb); @@ -785,86 +373,6 @@ } @Test - public void testStreamWriterSerialization() throws Exception { - CategoryPath[] testPaths = { - new CategoryPath("hi", "there", "man"), - new CategoryPath("hello"), - new CategoryPath("date", "2009", "May", "13", "14", "59", "00"), - // See that an empty category, which generates a (char)0, - // doesn't cause any problems in the middle of the serialization: - new CategoryPath(), - new CategoryPath("another", "example") - }; - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - OutputStreamWriter osw = new OutputStreamWriter(baos, "UTF-8"); // UTF-8 is always supported. - for (CategoryPath cp : testPaths) { - cp.serializeToStreamWriter(osw); - } - osw.flush(); - ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); - InputStreamReader isr = new InputStreamReader(bais, "UTF-8"); - CategoryPath[] checkPaths = { - new CategoryPath(), new CategoryPath(), new CategoryPath(), new CategoryPath(), new CategoryPath() - }; - for (int j = 0; j < checkPaths.length; j++) { - checkPaths[j].deserializeFromStreamReader(isr); - assertEquals("Paths not equal", testPaths[j], checkPaths[j]); - } - } - - @Test - public void testCharSequenceCtor() throws Exception { - CategoryPath[] testPaths = { - new CategoryPath(new CS("hi"), new CS("there"), new CS("man")), - new CategoryPath(new CS("hello")), - new CategoryPath(new CS("date"), new CS("2009"), new CS("May"), new CS("13"), - new CS("14"), new CS("59"), new CS("00")), - new CategoryPath(), - new CategoryPath(new CS("another"), new CS("example")) - }; - assertEquals("Wrong capacity", 10, testPaths[0].capacityChars()); - assertEquals("Wrong capacity", 5, testPaths[1].capacityChars()); - assertEquals("Wrong capacity", 19, testPaths[2].capacityChars()); - assertEquals("Wrong capacity", 0, testPaths[3].capacityChars()); - assertEquals("Wrong capacity", 14, testPaths[4].capacityChars()); - - assertEquals("Wrong component", "hi", testPaths[0].getComponent(0)); - assertEquals("Wrong component", "there", testPaths[0].getComponent(1)); - assertEquals("Wrong component", "man", testPaths[0].getComponent(2)); - assertEquals("Wrong component", "hello", testPaths[1].getComponent(0)); - assertEquals("Wrong component", "date", testPaths[2].getComponent(0)); - assertEquals("Wrong component", "2009", testPaths[2].getComponent(1)); - assertEquals("Wrong component", "May", testPaths[2].getComponent(2)); - assertEquals("Wrong component", "13", testPaths[2].getComponent(3)); - assertEquals("Wrong component", "14", testPaths[2].getComponent(4)); - assertEquals("Wrong component", "59", testPaths[2].getComponent(5)); - assertEquals("Wrong component", "00", testPaths[2].getComponent(6)); - assertNull("Not null component", testPaths[3].getComponent(0)); - assertEquals("Wrong component", "another", testPaths[4].getComponent(0)); - assertEquals("Wrong component", "example", testPaths[4].getComponent(1)); - } - - @Test - public void testIsDescendantOf() throws Exception { - CategoryPath[] testPaths = { - new CategoryPath(new CS("hi"), new CS("there")), - new CategoryPath(new CS("hi"), new CS("there"), new CS("man")), - new CategoryPath(new CS("hithere"), new CS("man")), - new CategoryPath(new CS("hi"), new CS("there"), new CS("mano")), - new CategoryPath(), - }; - assertTrue(testPaths[0].isDescendantOf(testPaths[0])); - assertTrue(testPaths[0].isDescendantOf(testPaths[4])); - assertFalse(testPaths[4].isDescendantOf(testPaths[0])); - assertTrue(testPaths[1].isDescendantOf(testPaths[0])); - assertTrue(testPaths[1].isDescendantOf(testPaths[1])); - assertTrue(testPaths[3].isDescendantOf(testPaths[0])); - assertFalse(testPaths[2].isDescendantOf(testPaths[0])); - assertFalse(testPaths[2].isDescendantOf(testPaths[1])); - assertFalse(testPaths[3].isDescendantOf(testPaths[1])); - } - - @Test public void testCompareTo() { CategoryPath p = new CategoryPath("a/b/c/d", '/'); CategoryPath pother = new CategoryPath("a/b/c/d", '/'); @@ -880,25 +388,5 @@ pother = new CategoryPath("a/b/c//e", '/'); assertTrue(pother.compareTo(p) < 0); } - - private static class CS implements CharSequence { - public CS(String s) { - this.ca = new char[s.length()]; - s.getChars(0, s.length(), this.ca, 0); - } - @Override - public char charAt(int index) { - return this.ca[index]; - } - @Override - public int length() { - return this.ca.length; - } - @Override - public CharSequence subSequence(int start, int end) { - return null; // not used. - } - private char[] ca; - } } Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java (revision 1428749) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java (working copy) @@ -81,7 +81,6 @@ } private void validate(Directory dest, Directory src, OrdinalMap ordMap) throws Exception { - CategoryPath cp = new CategoryPath(); DirectoryTaxonomyReader destTR = new DirectoryTaxonomyReader(dest); try { final int destSize = destTR.getSize(); @@ -98,7 +97,7 @@ // validate that all source categories exist in destination, and their // ordinals are as expected. for (int j = 1; j < srcSize; j++) { - srcTR.getPath(j, cp); + CategoryPath cp = srcTR.getPath(j); int destOrdinal = destTR.getOrdinal(cp); assertTrue(cp + " not found in destination", destOrdinal > 0); assertEquals(destOrdinal, map[j]); Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (revision 1428749) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (working copy) @@ -155,7 +155,7 @@ int k = random.nextInt(n); tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE); for (int j=0; j<=k; j++) { - tw.addCategory(new CategoryPath(cp[j])); + tw.addCategory(cp[j].clone()); } tw.close(); if (closeReader) { Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java (revision 1428749) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java (working copy) @@ -108,7 +108,7 @@ @Override public void addLabel(CategoryPath label, int ordinal) { - map.put(new CategoryPath(label), ordinal); + map.put(label.clone(), ordinal); } @Override