Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1411159) +++ lucene/CHANGES.txt (working copy) @@ -34,6 +34,26 @@ Override lengthNorm and/or encode/decodeNormValue to change the specifics, like Lucene 3.x. (Robert Muir) +* LUCENE-3441: The facet module now supports NRT. As a result, the following + changes were made: + - DirectoryTaxonomyReader has a new constructor which takes a + DirectoryTaxonomyWriter. You should use that constructor in order to get + the NRT support (or the old one for non-NRT). + - TaxonomyReader.refresh() removed in exchange for TaxonomyReader.openIfChanged + static method. Similar to DirectoryReader, the method either returns null + if no changes were made to the taxonomy, or a new TR instance otherwise. + Instead of calling refresh(), you should write similar code to how you reopen + a regular DirectoryReader. + - TaxonomyReader.openIfChanged (previously refresh()) no longer throws + IncosistentTaxonomyException, and supports recreate. InconsistentTaxoEx + was removed. + - ChildrenArrays was pulled out of TaxonomyReader into a top-level class. + - TaxonomyReader was made an abstract class (instead of an interface), with + methods such as close() and reference counting management pulled from + DirectoryTaxonomyReader, and made final. The rest of the methods, remained + abstract. + (Shai Erera) + New Features * LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (revision 1411159) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (working copy) @@ -8,8 +8,8 @@ import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.search.results.MutableFacetResultNode; import org.apache.lucene.facet.search.results.IntermediateFacetResult; +import org.apache.lucene.facet.taxonomy.ChildrenArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays; import org.apache.lucene.facet.util.ResultSortUtils; /* @@ -120,7 +120,7 @@ * @return total number of descendants considered here by pq, excluding ordinal itself. */ private int heapDescendants(int ordinal, Heap pq, - MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) { + MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) throws IOException { int partitionSize = facetArrays.getArraysLength(); int endOffset = offset + partitionSize; ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays(); Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (revision 1411159) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (working copy) @@ -12,8 +12,8 @@ import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.search.results.MutableFacetResultNode; import org.apache.lucene.facet.search.results.IntermediateFacetResult; +import org.apache.lucene.facet.taxonomy.ChildrenArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays; import org.apache.lucene.util.collections.IntIterator; import org.apache.lucene.util.collections.IntToObjectMap; Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java (working copy) @@ -0,0 +1,88 @@ +package org.apache.lucene.facet.taxonomy; + + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Equivalent representations of the taxonomy's parent info, + * used internally for efficient computation of facet results: + * "youngest child" and "oldest sibling" + */ +public class ChildrenArrays { + + private final int[] youngestChild, olderSibling; + + public ChildrenArrays(int[] parents) { + this(parents, null); + } + + public ChildrenArrays(int[] parents, ChildrenArrays copyFrom) { + youngestChild = new int[parents.length]; + olderSibling = new int[parents.length]; + int first = 0; + if (copyFrom != null) { + System.arraycopy(copyFrom.getYoungestChildArray(), 0, youngestChild, 0, copyFrom.getYoungestChildArray().length); + System.arraycopy(copyFrom.getOlderSiblingArray(), 0, olderSibling, 0, copyFrom.getOlderSiblingArray().length); + first = copyFrom.getOlderSiblingArray().length; + } + computeArrays(parents, first); + } + + private void computeArrays(int[] parents, int first) { + // reset the youngest child of all ordinals. while this should be done only + // for the leaves, we don't know up front which are the leaves, so we reset + // all of them. + for (int i = first; i < parents.length; i++) { + youngestChild[i] = TaxonomyReader.INVALID_ORDINAL; + } + + // the root category has no parent, and therefore no siblings + if (first == 0) { + first = 1; + olderSibling[0] = TaxonomyReader.INVALID_ORDINAL; + } + + for (int i = first; i < parents.length; i++) { + // note that parents[i] is always < i, so the right-hand-side of + // the following line is already set when we get here + olderSibling[i] = youngestChild[parents[i]]; + youngestChild[parents[i]] = i; + } + } + + /** + * Returns an {@code int[]} the size of the taxonomy listing for each category + * the ordinal of its immediate older sibling (the sibling in the taxonomy + * tree with the highest ordinal below that of the given ordinal). The value + * for a category with no older sibling is {@link TaxonomyReader#INVALID_ORDINAL}. + */ + public int[] getOlderSiblingArray() { + return olderSibling; + } + + /** + * Returns an {@code int[]} the size of the taxonomy listing the ordinal of + * the youngest (highest numbered) child category of each category in the + * taxonomy. The value for a leaf category (a category without children) is + * {@link TaxonomyReader#INVALID_ORDINAL}. + */ + public int[] getYoungestChildArray() { + return youngestChild; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ChildrenArrays.java ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java (revision 1411159) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java (working copy) @@ -1,40 +0,0 @@ -package org.apache.lucene.facet.taxonomy; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Exception indicating that a certain operation could not be performed - * on a taxonomy related object because of an inconsistency. - *

- * For example, trying to refresh a taxonomy reader might fail in case - * the underlying taxonomy was meanwhile modified in a manner which - * does not allow to perform such a refresh. (See {@link TaxonomyReader#refresh()}.) - * - * @lucene.experimental - */ -public class InconsistentTaxonomyException extends Exception { - - public InconsistentTaxonomyException(String message) { - super(message); - } - - public InconsistentTaxonomyException() { - super(); - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (revision 1411159) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (working copy) @@ -3,7 +3,10 @@ import java.io.Closeable; import java.io.IOException; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.store.AlreadyClosedException; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -60,13 +63,13 @@ * * @lucene.experimental */ -public interface TaxonomyReader extends Closeable { +public abstract class TaxonomyReader implements Closeable { /** - * The root category (the category with the empty path) always has the - * ordinal 0, to which we give a name ROOT_ORDINAL. - * getOrdinal() of an empty path will always return ROOT_ORDINAL, and - * getCategory(ROOT_ORDINAL) will return the empty path. + * The root category (the category with the empty path) always has the ordinal + * 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(CategoryPath)} + * of an empty path will always return {@code ROOT_ORDINAL}, and + * {@link #getPath(int)} with {@code ROOT_ORDINAL} will return the empty path. */ public final static int ROOT_ORDINAL = 0; @@ -77,207 +80,195 @@ public final static int INVALID_ORDINAL = -1; /** - * getOrdinal() returns the ordinal of the category given as a path. - * The ordinal is the category's serial number, an integer which starts - * with 0 and grows as more categories are added (note that once a category - * is added, it can never be deleted). - *

- * If the given category wasn't found in the taxonomy, INVALID_ORDINAL is - * returned. + * If the taxonomy has changed since the provided reader was + * opened, open and return a new {@link TaxonomyReader}; else, return + * {@code null}. The new reader, if not {@code null}, will be the same + * type of reader as the one given to this method. + * + *

+ * This method is typically far less costly than opening a + * fully new {@link TaxonomyReader} as it shares + * resources with the provided + * {@link TaxonomyReader}, when possible. */ - public int getOrdinal(CategoryPath categoryPath) throws IOException; - - /** - * getPath() returns the path name of the category with the given - * ordinal. The path is returned as a new CategoryPath object - to - * reuse an existing object, use {@link #getPath(int, CategoryPath)}. - *

- * A null is returned if a category with the given ordinal does not exist. - */ - public CategoryPath getPath(int ordinal) throws IOException; + public static TaxonomyReader openIfChanged(TaxonomyReader oldTaxoReader) throws IOException { + final TaxonomyReader newTaxoReader = oldTaxoReader.doOpenIfChanged(); + assert newTaxoReader != oldTaxoReader; + return newTaxoReader; + } - /** - * getPath() returns the path name of the category with the given - * ordinal. The path is written to the given CategoryPath object (which - * is cleared first). - *

- * If a category with the given ordinal does not exist, the given - * CategoryPath object is not modified, and the method returns - * false. Otherwise, the method returns true. - */ - public boolean getPath(int ordinal, CategoryPath result) throws IOException; + private volatile boolean closed = false; + // set refCount to 1 at start + private final AtomicInteger refCount = new AtomicInteger(1); + /** - * refresh() re-reads the taxonomy information if there were any changes to - * the taxonomy since this instance was opened or last refreshed. Calling - * refresh() is more efficient than close()ing the old instance and opening a - * new one. - *

- * If there were no changes since this instance was opened or last refreshed, - * then this call does nothing. Note, however, that this is still a relatively - * slow method (as it needs to verify whether there have been any changes on - * disk to the taxonomy), so it should not be called too often needlessly. In - * faceted search, the taxonomy reader's refresh() should be called only after - * a reopen() of the main index. - *

- * Refreshing the taxonomy might fail in some cases, for example - * if the taxonomy was recreated since this instance was opened or last refreshed. - * In this case an {@link InconsistentTaxonomyException} is thrown, - * suggesting that in order to obtain up-to-date taxonomy data a new - * {@link TaxonomyReader} should be opened. Note: This {@link TaxonomyReader} - * instance remains unchanged and usable in this case, and the application can - * continue to use it, and should still {@link #close()} when no longer needed. - *

- * It should be noted that refresh() is similar in purpose to - * IndexReader.reopen(), but the two methods behave differently. refresh() - * refreshes the existing TaxonomyReader object, rather than opening a new one - * in addition to the old one as reopen() does. The reason is that in a - * taxonomy, one can only add new categories and cannot modify or delete - * existing categories; Therefore, there is no reason to keep an old snapshot - * of the taxonomy open - refreshing the taxonomy to the newest data and using - * this new snapshots in all threads (whether new or old) is fine. This saves - * us needing to keep multiple copies of the taxonomy open in memory. - * @return true if anything has changed, false otherwise. + * performs the actual task of closing the resources that are used by the + * taxonomy reader. */ - public boolean refresh() throws IOException, InconsistentTaxonomyException; + protected abstract void doClose() throws IOException; /** - * getParent() returns the ordinal of the parent category of the category - * with the given ordinal. - *

- * When a category is specified as a path name, finding the path of its - * parent is as trivial as dropping the last component of the path. - * getParent() is functionally equivalent to calling getPath() on the - * given ordinal, dropping the last component of the path, and then calling - * getOrdinal() to get an ordinal back. However, implementations are - * expected to provide a much more efficient implementation: - *

- * getParent() should be a very quick method, as it is used during the - * facet aggregation process in faceted search. Implementations will most - * likely want to serve replies to this method from a pre-filled cache. - *

- * If the given ordinal is the ROOT_ORDINAL, an INVALID_ORDINAL is returned. - * If the given ordinal is a top-level category, the ROOT_ORDINAL is returned. - * If an invalid ordinal is given (negative or beyond the last available - * ordinal), an ArrayIndexOutOfBoundsException is thrown. However, it is - * expected that getParent will only be called for ordinals which are - * already known to be in the taxonomy. + * If the taxonomy has changed since the provided reader was + * opened, open and return a new {@link TaxonomyReader}; else, return + * {@code null}. The new reader, if not {@code null}, will be the same + * type of reader as the one given to this method. + * + *

+ * This method is typically far less costly than opening a + * fully new {@link TaxonomyReader} as it shares + * resources with the provided + * {@link TaxonomyReader}, when possible. */ - public int getParent(int ordinal) throws IOException; + protected abstract TaxonomyReader doOpenIfChanged() throws IOException; /** - * getParentArray() returns an int array of size getSize() listing the - * ordinal of the parent category of each category in the taxonomy. - *

- * The caller can hold on to the array it got indefinitely - it is - * guaranteed that no-one else will modify it. The other side of the - * same coin is that the caller must treat the array it got as read-only - * and not modify it, because other callers might have gotten the - * same array too (and getParent() calls might be answered from the - * same array). - *

- * If you use getParentArray() instead of getParent(), remember that - * the array you got is (naturally) not modified after a refresh(), - * so you should always call getParentArray() again after a refresh(). - *

- * This method's function is similar to allocating an array of size - * getSize() and filling it with getParent() calls, but implementations - * are encouraged to implement it much more efficiently, with O(1) - * complexity. This can be done, for example, by the implementation - * already keeping the parents in an array, and just returning this - * array (without any allocation or copying) when requested. + * @throws AlreadyClosedException if this IndexReader is closed */ - public int[] getParentArray() throws IOException; + protected final void ensureOpen() throws AlreadyClosedException { + if (getRefCount() <= 0) { + throw new AlreadyClosedException("this TaxonomyReader is closed"); + } + } + + @Override + public final void close() throws IOException { + if (!closed) { + synchronized (this) { + if (!closed) { + decRef(); + closed = true; + } + } + } + } /** - * Equivalent representations of the taxonomy's parent info, - * used internally for efficient computation of facet results: - * "youngest child" and "oldest sibling" + * Expert: decreases the refCount of this TaxonomyReader instance. If the + * refCount drops to 0 this taxonomy reader is closed. */ - public static interface ChildrenArrays { - /** - * getYoungestChildArray() returns an int array of size getSize() - * listing the ordinal of the youngest (highest numbered) child - * category of each category in the taxonomy. The value for a leaf - * category (a category without children) is - * INVALID_ORDINAL. - */ - public int[] getYoungestChildArray(); - /** - * getOlderSiblingArray() returns an int array of size getSize() - * listing for each category the ordinal of its immediate older - * sibling (the sibling in the taxonomy tree with the highest ordinal - * below that of the given ordinal). The value for a category with no - * older sibling is INVALID_ORDINAL. - */ - public int[] getOlderSiblingArray(); + public final void decRef() throws IOException { + ensureOpen(); + final int rc = refCount.decrementAndGet(); + if (rc == 0) { + boolean success = false; + try { + doClose(); + closed = true; + success = true; + } finally { + if (!success) { + // Put reference back on failure + refCount.incrementAndGet(); + } + } + } else if (rc < 0) { + throw new IllegalStateException("too many decRef calls: refCount is " + rc + " after decrement"); + } } /** - * getChildrenArrays() returns a {@link ChildrenArrays} object which can - * be used together to efficiently enumerate the children of any category. - *

- * The caller can hold on to the object it got indefinitely - it is - * guaranteed that no-one else will modify it. The other side of the - * same coin is that the caller must treat the object which it got (and - * the arrays it contains) as read-only and not modify it, because - * other callers might have gotten the same object too. - *

- * Implementations should have O(getSize()) time for the first call or - * after a refresh(), but O(1) time for further calls. In neither case - * there should be a need to read new data from disk. These guarantees - * are most likely achieved by calculating this object (based on the - * getParentArray()) when first needed, and later (if the taxonomy was not - * refreshed) returning the same object (without any allocation or copying) - * when requested. - *

- * The reason we have one method returning one object, rather than two - * methods returning two arrays, is to avoid race conditions in a multi- - * threaded application: We want to avoid the possibility of returning one - * new array and one old array, as those could not be used together. + * Returns a {@link ChildrenArrays} object which can be used together to + * efficiently enumerate the children of any category. + *

+ * The caller can hold on to the object it got indefinitely - it is guaranteed + * that no-one else will modify it. The other side of the same coin is that + * the caller must treat the object which it got (and the arrays it contains) + * as read-only and not modify it, because other callers might have + * gotten the same object too. */ - public ChildrenArrays getChildrenArrays(); + public abstract ChildrenArrays getChildrenArrays() throws IOException; /** * Retrieve user committed data. + * * @see TaxonomyWriter#commit(Map) */ - public Map getCommitUserData() throws IOException; - + public abstract Map getCommitUserData() throws IOException; + /** - * Expert: increments the refCount of this TaxonomyReader instance. - * RefCounts can be used to determine when a taxonomy reader can be closed - * safely, i.e. as soon as there are no more references. - * Be sure to always call a corresponding decRef(), in a finally clause; - * otherwise the reader may never be closed. + * Returns the ordinal of the category given as a path. The ordinal is the + * category's serial number, an integer which starts with 0 and grows as more + * categories are added (note that once a category is added, it can never be + * deleted). + * + * @return the category's ordinal or {@link #INVALID_ORDINAL} if the category + * wasn't foun. */ - public void incRef(); - + public abstract int getOrdinal(CategoryPath categoryPath) throws IOException; + /** - * Expert: decreases the refCount of this TaxonomyReader instance. - * If the refCount drops to 0, then pending changes (if any) can be - * committed to the taxonomy index and this reader can be closed. - * @throws IOException If there is a low-level I/O error. + * Returns the ordinal of the parent category of the category with the given + * ordinal, according to the following rules: + * + * + *

+ * + * @throws ArrayIndexOutOfBoundsException + * if an invalid ordinal is given (negative or beyond the last + * available ordinal) */ - public void decRef() throws IOException; + public abstract int getParent(int ordinal) throws IOException; /** - * Expert: returns the current refCount for this taxonomy reader + * Returns an {@code int[]} the size of the taxonomy listing the ordinal of + * the parent category of each category in the taxonomy. + *

+ * The caller can hold on to the array it got indefinitely - it is guaranteed + * that no-one else will modify it. The other side of the same coin is that + * the caller must treat the array it got as read-only and not modify + * it, because other callers might have gotten the same array too (and + * getParent() calls might be answered from the same array). */ - public int getRefCount(); + public abstract int[] getParentArray() throws IOException; + + /** + * Returns the path name of the category with the given ordinal. The path is + * returned as a new CategoryPath object - to reuse an existing object, use + * {@link #getPath(int, CategoryPath)}. + * + * @return a {@link CategoryPath} with the required path, or {@code null} if + * the given ordinal is unknown to the taxonomy. + */ + public abstract CategoryPath getPath(int ordinal) throws IOException; + + /** + * Same as {@link #getPath(int)}, only reuses the given {@link CategoryPath} + * instances. + */ + public abstract boolean getPath(int ordinal, CategoryPath result) throws IOException; + /** Returns the current refCount for this taxonomy reader. */ + public final int getRefCount() { + return refCount.get(); + } + /** - * getSize() returns the number of categories in the taxonomy. - *

- * Because categories are numbered consecutively starting with 0, it - * means the taxonomy contains ordinals 0 through getSize()-1. - *

- * Note that the number returned by getSize() is often slightly higher - * than the number of categories inserted into the taxonomy; This is - * because when a category is added to the taxonomy, its ancestors - * are also added automatically (including the root, which always get - * ordinal 0). + * Returns the number of categories in the taxonomy. Note that the number of + * categories returned is often slightly higher than the number of categories + * inserted into the taxonomy; This is because when a category is added to the + * taxonomy, its ancestors are also added automatically (including the root, + * which always get ordinal 0). */ - public int getSize(); + public abstract int getSize(); + + /** + * Expert: increments the refCount of this TaxonomyReader instance. RefCounts + * can be used to determine when a taxonomy reader can be closed safely, i.e. + * as soon as there are no more references. Be sure to always call a + * corresponding decRef(), in a finally clause; otherwise the reader may never + * be closed. + */ + public final void incRef() { + ensureOpen(); + refCount.incrementAndGet(); + } } Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java (revision 1411159) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java (working copy) @@ -2,6 +2,7 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.util.BytesRef; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -28,6 +29,7 @@ static final String FULL = "$full_path$"; static final String FIELD_PAYLOADS = "$payloads$"; static final String PAYLOAD_PARENT = "p"; + static final BytesRef PAYLOAD_PARENT_BYTES_REF = new BytesRef(PAYLOAD_PARENT); static final char[] PAYLOAD_PARENT_CHARS = PAYLOAD_PARENT.toCharArray(); /** Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (revision 1411159) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (working copy) @@ -4,26 +4,22 @@ import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException; +import org.apache.lucene.facet.taxonomy.ChildrenArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.collections.LRUHashMap; /* @@ -55,19 +51,13 @@ * * @lucene.experimental */ -public class DirectoryTaxonomyReader implements TaxonomyReader { +public class DirectoryTaxonomyReader extends TaxonomyReader { private static final Logger logger = Logger.getLogger(DirectoryTaxonomyReader.class.getName()); - private DirectoryReader indexReader; + private final IndexWriter indexWriter; + private final DirectoryReader indexReader; - // The following lock is used to allow multiple threads to read from the - // index concurrently, while having them block during the very short - // critical moment of refresh() (see comments below). Note, however, that - // we only read from the index when we don't have the entry in our cache, - // and the caches are locked separately. - private ReadWriteLock indexReaderLock = new ReentrantReadWriteLock(); - // The following are the limited-size LRU caches used to cache the latest // results from getOrdinal() and getLabel(). // Because LRUHashMap is not thread-safe, we need to synchronize on this @@ -85,62 +75,98 @@ private final LRUHashMap ordinalCache; private final LRUHashMap categoryCache; - // getParent() needs to be extremely efficient, to the point that we need - // to fetch all the data in advance into memory, and answer these calls - // from memory. Currently we use a large integer array, which is - // initialized when the taxonomy is opened, and potentially enlarged - // when it is refresh()ed. - // These arrays are not syncrhonized. Rather, the reference to the array - // is volatile, and the only writing operation (refreshPrefetchArrays) - // simply creates a new array and replaces the reference. The volatility - // of the reference ensures the correct atomic replacement and its - // visibility properties (the content of the array is visible when the - // new reference is visible). - private ParentArray parentArray; + // TODO: consolidate these objects into one ParentInfo or something? + private volatile ParentArray parentArray; + private volatile ChildrenArrays childrenArrays; private char delimiter = Consts.DEFAULT_DELIMITER; - private volatile boolean closed = false; + /** Called only from {@link #doOpenIfChanged()}. */ + DirectoryTaxonomyReader(DirectoryReader indexReader, IndexWriter indexWriter, + LRUHashMap ordinalCache, + LRUHashMap categoryCache, ParentArray parentArray, + ChildrenArrays childrenArrays) throws IOException { + this.indexReader = indexReader; + this.indexWriter = indexWriter; + // nocommit: copying the caches can be expensive in NRT. Perhaps we can + // reuse the old cache alongside a new one, or just reuse the old cache? + // if the old TR instance is about to die anyway, reusing the cache instance is ok + // we may need to maintain a ref count for it or something, so that it's not clear()'ed + // upon close of the old instance. + // ALSO, when the code uses clone(), it's extremely costly? I.e. tests like + // TestDTR.testOpenIfChangedManySegments take forever to complete !? + synchronized (ordinalCache) { + this.ordinalCache = new LRUHashMap(ordinalCache.getMaxSize()); + this.ordinalCache.putAll(ordinalCache); +// this.ordinalCache = ordinalCache.clone(); + } + synchronized (categoryCache) { + this.categoryCache = new LRUHashMap(categoryCache.getMaxSize()); + this.categoryCache.putAll(categoryCache); +// this.categoryCache = categoryCache.clone(); + } + + this.parentArray = parentArray; + this.childrenArrays = childrenArrays; + + // Remove any INVALID_ORDINAL values from the ordinal cache, + // because it is possible those are now answered by the new data! + Iterator> i = this.ordinalCache.entrySet().iterator(); + while (i.hasNext()) { + Entry e = i.next(); + if (e.getValue().intValue() == INVALID_ORDINAL) { + i.remove(); + } + } + } - // set refCount to 1 at start - private final AtomicInteger refCount = new AtomicInteger(1); - /** * Open for reading a taxonomy stored in a given {@link Directory}. + * * @param directory - * The {@link Directory} in which to the taxonomy lives. Note that - * the taxonomy is read directly to that directory (not from a - * subdirectory of it). - * @throws CorruptIndexException if the Taxonomy is corrupted. - * @throws IOException if another error occurred. + * The {@link Directory} in which to the taxonomy resides. + * @throws CorruptIndexException + * if the Taxonomy is corrupt. + * @throws IOException + * if another error occurred. */ public DirectoryTaxonomyReader(Directory directory) throws IOException { - this.indexReader = openIndexReader(directory); + indexReader = openIndexReader(directory); + indexWriter = null; // These are the default cache sizes; they can be configured after // construction with the cache's setMaxSize() method ordinalCache = new LRUHashMap(4000); categoryCache = new LRUHashMap(4000); - - // TODO (Facet): consider lazily create parent array when asked, not in the constructor - parentArray = new ParentArray(); - parentArray.refresh(indexReader); } - + + /** + * Opens a {@link DirectoryTaxonomyReader} over the given + * {@link DirectoryTaxonomyWriter} (for NRT). + * + * @param taxoWriter + * The {@link DirectoryTaxonomyWriter} from which to obtain newly + * added categories, in real-time. + */ + public DirectoryTaxonomyReader(DirectoryTaxonomyWriter taxoWriter) throws IOException { + indexWriter = taxoWriter.getInternalIndexWriter(); + indexReader = openIndexReader(indexWriter); + + // These are the default cache sizes; they can be configured after + // construction with the cache's setMaxSize() method + ordinalCache = new LRUHashMap(4000); + categoryCache = new LRUHashMap(4000); + } + protected DirectoryReader openIndexReader(Directory directory) throws IOException { return DirectoryReader.open(directory); } + + protected DirectoryReader openIndexReader(IndexWriter writer) throws IOException { + return DirectoryReader.open(writer, false); + } /** - * @throws AlreadyClosedException if this IndexReader is closed - */ - protected final void ensureOpen() throws AlreadyClosedException { - if (getRefCount() <= 0) { - throw new AlreadyClosedException("this TaxonomyReader is closed"); - } - } - - /** * setCacheSize controls the maximum allowed size of each of the caches * used by {@link #getPath(int)} and {@link #getOrdinal(CategoryPath)}. *

@@ -151,10 +177,10 @@ */ public void setCacheSize(int size) { ensureOpen(); - synchronized(categoryCache) { + synchronized (categoryCache) { categoryCache.setMaxSize(size); } - synchronized(ordinalCache) { + synchronized (ordinalCache) { ordinalCache.setMaxSize(size); } } @@ -177,13 +203,13 @@ @Override public int getOrdinal(CategoryPath categoryPath) throws IOException { ensureOpen(); - if (categoryPath.length()==0) { + if (categoryPath.length() == 0) { return ROOT_ORDINAL; } String path = categoryPath.toString(delimiter); // First try to find the answer in the LRU cache: - synchronized(ordinalCache) { + synchronized (ordinalCache) { Integer res = ordinalCache.get(path); if (res!=null) { return res.intValue(); @@ -193,16 +219,9 @@ // If we're still here, we have a cache miss. We need to fetch the // value from disk, and then also put it in the cache: int ret = TaxonomyReader.INVALID_ORDINAL; - try { - indexReaderLock.readLock().lock(); - // TODO (Facet): avoid Multi*? - Bits liveDocs = MultiFields.getLiveDocs(indexReader); - DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, liveDocs, Consts.FULL, new BytesRef(path), 0); - if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - ret = docs.docID(); - } - } finally { - indexReaderLock.readLock().unlock(); + DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(path), 0); + if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + ret = docs.docID(); } // Put the new value in the cache. Note that it is possible that while @@ -211,8 +230,6 @@ // not care about this possibilty, as LRUCache replaces previous values // of the same keys (it doesn't store duplicates). synchronized(ordinalCache) { - // GB: new Integer(int); creates a new object each and every time. - // Integer.valueOf(int) might not (See JavaDoc). ordinalCache.put(path, Integer.valueOf(ret)); } @@ -241,7 +258,7 @@ public boolean getPath(int ordinal, CategoryPath result) throws IOException { ensureOpen(); String label = getLabel(ordinal); - if (label==null) { + if (label == null) { return false; } result.clear(); @@ -251,42 +268,24 @@ private String getLabel(int catID) throws IOException { ensureOpen(); - // First try to find the answer in the LRU cache. It is very - // unfortunate that we need to allocate an Integer object here - - // it would have been better if we used a hash table specifically - // designed for int keys... - // GB: new Integer(int); creates a new object each and every time. - // Integer.valueOf(int) might not (See JavaDoc). + // TODO: can we use an int-based hash impl, such as IntToObjectMap, + // wrapped as LRU? Integer catIDInteger = Integer.valueOf(catID); - synchronized(categoryCache) { + synchronized (categoryCache) { String res = categoryCache.get(catIDInteger); if (res!=null) { return res; } } - // If we're still here, we have a cache miss. We need to fetch the - // value from disk, and then also put it in the cache: - String ret; - try { - indexReaderLock.readLock().lock(); - // The taxonomy API dictates that if we get an invalid category - // ID, we should return null, If we don't check this here, we - // can some sort of an exception from the document() call below. - // NOTE: Currently, we *do not* cache this return value; There - // isn't much point to do so, because checking the validity of - // the docid doesn't require disk access - just comparing with - // the number indexReader.maxDoc(). - if (catID<0 || catID>=indexReader.maxDoc()) { - return null; - } - final LoadFullPathOnly loader = new LoadFullPathOnly(); - indexReader.document(catID, loader); - ret = loader.getFullPath(); - } finally { - indexReaderLock.readLock().unlock(); + if (catID < 0 || catID >= indexReader.maxDoc()) { + return null; } + final LoadFullPathOnly loader = new LoadFullPathOnly(); + indexReader.document(catID, loader); + String ret = loader.getFullPath(); + // Put the new value in the cache. Note that it is possible that while // we were doing the above fetching (without the cache locked), some // other thread already added the same category to the cache. We do @@ -299,150 +298,85 @@ return ret; } + // TODO: move to a ParentInfo class? (see TODO for parentArray) @Override - public int getParent(int ordinal) { + public int getParent(int ordinal) throws IOException { ensureOpen(); - // Note how we don't need to hold the read lock to do the following, - // because the array reference is volatile, ensuring the correct - // visibility and ordering: if we get the new reference, the new - // data is also visible to this thread. return getParentArray()[ordinal]; } - /** - * getParentArray() returns an int array of size getSize() listing the - * ordinal of the parent category of each category in the taxonomy. - *

- * The caller can hold on to the array it got indefinitely - it is - * guaranteed that no-one else will modify it. The other side of the - * same coin is that the caller must treat the array it got as read-only - * and not modify it, because other callers might have gotten the - * same array too, and getParent() calls are also answered from the - * same array. - *

- * The getParentArray() call is extremely efficient, merely returning - * a reference to an array that already exists. For a caller that plans - * to call getParent() for many categories, using getParentArray() and - * the array it returns is a somewhat faster approach because it avoids - * the overhead of method calls and volatile dereferencing. - *

- * If you use getParentArray() instead of getParent(), remember that - * the array you got is (naturally) not modified after a refresh(), - * so you should always call getParentArray() again after a refresh(). - */ - @Override - public int[] getParentArray() { + public int[] getParentArray() throws IOException { ensureOpen(); - // Note how we don't need to hold the read lock to do the following, - // because the array reference is volatile, ensuring the correct - // visibility and ordering: if we get the new reference, the new - // data is also visible to this thread. + if (parentArray == null) { + synchronized (this) { + if (parentArray == null) { + parentArray = new ParentArray(indexReader); + } + } + } return parentArray.getArray(); } - // Note that refresh() is synchronized (it is the only synchronized - // method in this class) to ensure that it never gets called concurrently - // with itself. @Override - public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException { + protected DirectoryTaxonomyReader doOpenIfChanged() throws IOException { ensureOpen(); - /* - * Since refresh() can be a lengthy operation, it is very important that we - * avoid locking out all readers for its duration. This is why we don't hold - * the indexReaderLock write lock for the entire duration of this method. In - * fact, it is enough to hold it only during a single assignment! Other - * comments in this method will explain this. - */ - - // note that the lengthy operation indexReader.reopen() does not - // modify the reader, so we can do it without holding a lock. We can - // safely read indexReader without holding the write lock, because - // no other thread can be writing at this time (this method is the - // only possible writer, and it is "synchronized" to avoid this case). - DirectoryReader r2 = DirectoryReader.openIfChanged(indexReader); + + final DirectoryReader r2 = indexWriter == null + ? DirectoryReader.openIfChanged(indexReader) // not NRT + : DirectoryReader.openIfChanged(indexReader, indexWriter, false); // NRT if (r2 == null) { - return false; // no changes, nothing to do + return null; // no changes, nothing to do } - - // validate that a refresh is valid at this point, i.e. that the taxonomy - // was not recreated since this reader was last opened or refresshed. - String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH); - String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH); - if (t1 == null) { - if (t2 != null) { - r2.close(); - throw new InconsistentTaxonomyException("Taxonomy was recreated, epoch= " + t2); + + // check if the taxonomy was recreated + boolean success = false; + try { + boolean recreated = false; + String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH); + String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH); + if (t1 == null) { + if (t2 != null) { + recreated = true; + } + } else if (!t1.equals(t2)) { + // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data. + // it's ok to use String.equals because we require the two epoch values to be the same. + recreated = true; } - } else if (!t1.equals(t2)) { - // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data. - // it's ok to use String.equals because we require the two epoch values to be the same. - r2.close(); - throw new InconsistentTaxonomyException("Taxonomy was recreated epoch = " + t2 + " != " + t1); - } - - IndexReader oldreader = indexReader; - // we can close the old searcher, but need to synchronize this - // so that we don't close it in the middle that another routine - // is reading from it. - indexReaderLock.writeLock().lock(); - indexReader = r2; - indexReaderLock.writeLock().unlock(); - // We can close the old reader, but need to be certain that we - // don't close it while another method is reading from it. - // Luckily, we can be certain of that even without putting the - // oldreader.close() in the locked section. The reason is that - // after lock() succeeded above, we know that all existing readers - // had finished (this is what a read-write lock ensures). New - // readers, starting after the unlock() we just did, already got - // the new indexReader we set above. So nobody can be possibly - // using the old indexReader, and we can close it: - oldreader.close(); - - // We prefetch some of the arrays to make requests much faster. - // Let's refresh these prefetched arrays; This refresh is much - // is made more efficient by assuming that it is enough to read - // the values for new categories (old categories could not have been - // changed or deleted) - // Note that this this done without the write lock being held, - // which means that it is possible that during a refresh(), a - // reader will have some methods (like getOrdinal and getCategory) - // return fresh information, while getParent() - // (only to be prefetched now) still return older information. - // We consider this to be acceptable. The important thing, - // however, is that refreshPrefetchArrays() itself writes to - // the arrays in a correct manner (see discussion there) - parentArray.refresh(indexReader); - - // Remove any INVALID_ORDINAL values from the ordinal cache, - // because it is possible those are now answered by the new data! - Iterator> i = ordinalCache.entrySet().iterator(); - while (i.hasNext()) { - Entry e = i.next(); - if (e.getValue().intValue() == INVALID_ORDINAL) { - i.remove(); - } - } - return true; - } - @Override - public void close() throws IOException { - if (!closed) { - synchronized (this) { - if (!closed) { - decRef(); - closed = true; + // These objects are lazily created, so don't create/update them if they weren't created + // by this instance. + // Note: don't use getParentArray() / getChildrenArray() because these methods create + // the objects. + ParentArray newParentArray = null; + ChildrenArrays newChildrenArrays = null; + if (parentArray != null) { + // if the taxonomy was recreated, do not use the data from the current ParrentArray + newParentArray = recreated ? new ParentArray(r2) : new ParentArray(r2, parentArray); + if (childrenArrays != null) { + // if the taxonomy was recreated, do not use the data from the current ChildrenArrays + newChildrenArrays = recreated + ? new ChildrenArrays(newParentArray.getArray()) + : new ChildrenArrays(newParentArray.getArray(), childrenArrays); } } + DirectoryTaxonomyReader newtr = new DirectoryTaxonomyReader(r2, + indexWriter, ordinalCache, categoryCache, newParentArray, + newChildrenArrays); + success = true; + return newtr; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(r2); + } } } /** Do the actual closing, free up resources */ - private void doClose() throws IOException { + @Override + protected void doClose() throws IOException { indexReader.close(); - closed = true; - parentArray = null; childrenArrays = null; categoryCache.clear(); @@ -452,12 +386,7 @@ @Override public int getSize() { ensureOpen(); - indexReaderLock.readLock().lock(); - try { - return indexReader.numDocs(); - } finally { - indexReaderLock.readLock().unlock(); - } + return indexReader.numDocs(); } @Override @@ -466,68 +395,23 @@ return indexReader.getIndexCommit().getUserData(); } - private ChildrenArrays childrenArrays; - Object childrenArraysRebuild = new Object(); - @Override - public ChildrenArrays getChildrenArrays() { + public ChildrenArrays getChildrenArrays() throws IOException { ensureOpen(); - // Check if the taxonomy grew since we built the array, and if it - // did, create new (and larger) arrays and fill them as required. - // We do all this under a lock, two prevent to concurrent calls to - // needlessly do the same array building at the same time. - synchronized(childrenArraysRebuild) { - int num = getSize(); - int first; - if (childrenArrays==null) { - first = 0; - } else { - first = childrenArrays.getYoungestChildArray().length; - } - // If the taxonomy hasn't grown, we can return the existing object - // immediately - if (first == num) { - return childrenArrays; - } - // Otherwise, build new arrays for a new ChildrenArray object. - // These arrays start with an enlarged copy of the previous arrays, - // and then are modified to take into account the new categories: - int[] newYoungestChildArray = new int[num]; - int[] newOlderSiblingArray = new int[num]; - // In Java 6, we could just do Arrays.copyOf()... - if (childrenArrays!=null) { - System.arraycopy(childrenArrays.getYoungestChildArray(), 0, - newYoungestChildArray, 0, childrenArrays.getYoungestChildArray().length); - System.arraycopy(childrenArrays.getOlderSiblingArray(), 0, - newOlderSiblingArray, 0, childrenArrays.getOlderSiblingArray().length); - } - int[] parents = getParentArray(); - for (int i=first; i + * NOTE: you should not use the obtained {@link IndexWriter} in any + * way, other than opening an IndexReader on it, or otherwise, the taxonomy + * index may become corrupt! + */ + IndexWriter getInternalIndexWriter() { + return indexWriter; + } + } Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java (revision 1411159) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java (working copy) @@ -2,16 +2,14 @@ import java.io.IOException; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -29,55 +27,23 @@ * limitations under the License. */ -// getParent() needs to be extremely efficient, to the point that we need -// to fetch all the data in advance into memory, and answer these calls -// from memory. Currently we use a large integer array, which is -// initialized when the taxonomy is opened, and potentially enlarged -// when it is refresh()ed. /** * @lucene.experimental */ class ParentArray { - // These arrays are not syncrhonized. Rather, the reference to the array - // is volatile, and the only writing operation (refreshPrefetchArrays) - // simply creates a new array and replaces the reference. The volatility - // of the reference ensures the correct atomic replacement and its - // visibility properties (the content of the array is visible when the - // new reference is visible). - private volatile int prefetchParentOrdinal[] = null; + // TODO: maybe use PackedInts? + private final int[] parentOrdinals; - public int[] getArray() { - return prefetchParentOrdinal; + /** Used by {@link #add(int, int)} when the array needs to grow. */ + ParentArray(int[] parentOrdinals) { + this.parentOrdinals = parentOrdinals; } - /** - * refreshPrefetch() refreshes the parent array. Initially, it fills the - * array from the positions of an appropriate posting list. If called during - * a refresh(), when the arrays already exist, only values for new documents - * (those beyond the last one in the array) are read from the positions and - * added to the arrays (that are appropriately enlarged). We assume (and - * this is indeed a correct assumption in our case) that existing categories - * are never modified or deleted. - */ - void refresh(IndexReader indexReader) throws IOException { - // Note that it is not necessary for us to obtain the read lock. - // The reason is that we are only called from refresh() (precluding - // another concurrent writer) or from the constructor (when no method - // could be running). - // The write lock is also not held during the following code, meaning - // that reads *can* happen while this code is running. The "volatile" - // property of the prefetchParentOrdinal and prefetchDepth array - // references ensure the correct visibility property of the assignment - // but other than that, we do *not* guarantee that a reader will not - // use an old version of one of these arrays (or both) while a refresh - // is going on. But we find this acceptable - until a refresh has - // finished, the reader should not expect to see new information - // (and the old information is the same in the old and new versions). - int first; - int num = indexReader.maxDoc(); - if (prefetchParentOrdinal==null) { - prefetchParentOrdinal = new int[num]; + public ParentArray(IndexReader reader) throws IOException { + parentOrdinals = new int[reader.maxDoc()]; + if (parentOrdinals.length > 0) { + initFromReader(reader, 0); // Starting Lucene 2.9, following the change LUCENE-1542, we can // no longer reliably read the parent "-1" (see comment in // LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way @@ -85,78 +51,88 @@ // with existing indexes, so what we'll do instead is just // hard-code the parent of ordinal 0 to be -1, and assume (as is // indeed the case) that no other parent can be -1. - if (num>0) { - prefetchParentOrdinal[0] = TaxonomyReader.INVALID_ORDINAL; - } - first = 1; - } else { - first = prefetchParentOrdinal.length; - if (first==num) { - return; // nothing to do - no category was added - } - // In Java 6, we could just do Arrays.copyOf()... - int[] newarray = new int[num]; - System.arraycopy(prefetchParentOrdinal, 0, newarray, 0, - prefetchParentOrdinal.length); - prefetchParentOrdinal = newarray; + parentOrdinals[0] = TaxonomyReader.INVALID_ORDINAL; } + } + + public ParentArray(IndexReader reader, ParentArray copyFrom) throws IOException { + assert copyFrom != null; + int[] copyParents = copyFrom.getArray(); + assert copyParents.length < reader.maxDoc() : "do not init a new ParentArray if the index hasn't changed"; + + this.parentOrdinals = new int[reader.maxDoc()]; + System.arraycopy(copyParents, 0, parentOrdinals, 0, copyParents.length); + initFromReader(reader, copyParents.length); + } - // Read the new part of the parents array from the positions: - // TODO (Facet): avoid Multi*? - Bits liveDocs = MultiFields.getLiveDocs(indexReader); - DocsAndPositionsEnum positions = MultiFields.getTermPositionsEnum(indexReader, liveDocs, - Consts.FIELD_PAYLOADS, new BytesRef(Consts.PAYLOAD_PARENT), - DocsAndPositionsEnum.FLAG_PAYLOADS); - if ((positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) && first < num) { - throw new CorruptIndexException("Missing parent data for category " + first); + // Read the parents of the new categories + private void initFromReader(IndexReader reader, int first) throws IOException { + if (reader.maxDoc() == first) { + return; + } + + TermsEnum termsEnum = null; + DocsAndPositionsEnum positions = null; + int idx = 0; + for (AtomicReaderContext readerCtx : reader.leaves()) { + if (readerCtx.docBase < first) { + continue; } - for (int i=first; i= i (this is an - // invariant kept throughout this loop) - if (positions.docID()==i) { - if (positions.freq() == 0) { // shouldn't happen - throw new CorruptIndexException( - "Missing parent data for category "+i); - } - // TODO (Facet): keep a local (non-volatile) copy of the prefetchParentOrdinal - // reference, because access to volatile reference is slower (?). - // Note: The positions we get here are one less than the position - // increment we added originally, so we get here the right numbers: - prefetchParentOrdinal[i] = positions.nextPosition(); + // in general we could call readerCtx.reader().termPositionsEnum(), but that + // passes the liveDocs. Since we know there are no deletions, the code + // below may save some CPU cycles. + termsEnum = readerCtx.reader().fields().terms(Consts.FIELD_PAYLOADS).iterator(termsEnum); + if (!termsEnum.seekExact(Consts.PAYLOAD_PARENT_BYTES_REF, true)) { + throw new CorruptIndexException("Missing parent stream data for segment " + readerCtx.reader()); + } + positions = termsEnum.docsAndPositions(null /* no deletes in taxonomy */, positions); + if (positions == null) { + throw new CorruptIndexException("Missing parent stream data for segment " + readerCtx.reader()); + } - if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { - if ( i+1 < num ) { - throw new CorruptIndexException( - "Missing parent data for category "+(i+1)); - } - break; + idx = readerCtx.docBase; + int doc; + while ((doc = positions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + doc += readerCtx.docBase; + if (doc == idx) { + if (positions.freq() == 0) { // shouldn't happen + throw new CorruptIndexException("Missing parent data for category " + idx); } + + parentOrdinals[idx++] = positions.nextPosition(); } else { // this shouldn't happen - throw new CorruptIndexException( - "Missing parent data for category "+i); + throw new CorruptIndexException("Missing parent data for category " + idx); + } } + if (idx + 1 < readerCtx.reader().maxDoc()) { + throw new CorruptIndexException("Missing parent data for category " + (idx + 1)); + } } + + if (idx != reader.maxDoc()) { + throw new CorruptIndexException("Missing parent data for category " + idx); + } } - + + public int[] getArray() { + return parentOrdinals; + } + /** - * add() is used in LuceneTaxonomyWriter, not in LuceneTaxonomyReader. - * It is only called from a synchronized method, so it is not reentrant, - * and also doesn't need to worry about reads happening at the same time. - * - * NOTE: add() and refresh() CANNOT be used together. If you call add(), - * this changes the arrays and refresh() can no longer be used. + * Used by {@link DirectoryTaxonomyWriter}. If this method returns a non + * {@code null} value, it means that the parents array had to grow, and you + * should use the new instance for further {@link #add(int, int) adds}. */ - void add(int ordinal, int parentOrdinal) { - if (ordinal >= prefetchParentOrdinal.length) { - // grow the array, if necessary. - // In Java 6, we could just do Arrays.copyOf()... - int[] newarray = new int[ordinal*2+1]; - System.arraycopy(prefetchParentOrdinal, 0, newarray, 0, - prefetchParentOrdinal.length); - prefetchParentOrdinal = newarray; + ParentArray add(int ordinal, int parentOrdinal) { + if (ordinal >= parentOrdinals.length) { + int[] newarray = new int[ordinal * 2 + 1]; + System.arraycopy(parentOrdinals, 0, newarray, 0, parentOrdinals.length); + newarray[ordinal] = parentOrdinal; + return new ParentArray(newarray); } - prefetchParentOrdinal[ordinal] = parentOrdinal; + parentOrdinals[ordinal] = parentOrdinal; + return null; } } Index: lucene/facet/src/java/org/apache/lucene/util/collections/LRUHashMap.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/util/collections/LRUHashMap.java (revision 1411159) +++ lucene/facet/src/java/org/apache/lucene/util/collections/LRUHashMap.java (working copy) @@ -102,4 +102,10 @@ return size() > maxSize; } + @SuppressWarnings("unchecked") + @Override + public LRUHashMap clone() { + return (LRUHashMap) super.clone(); + } + } Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java (revision 1411159) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java (working copy) @@ -297,23 +297,17 @@ writers[0].indexWriter.close(); writers[0].taxWriter.close(); - readers[0].taxReader.refresh(); + TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(readers[0].taxReader); + assertNotNull(newTaxoReader); + assertTrue("should have received more cagtegories in updated taxonomy", newTaxoReader.getSize() > readers[0].taxReader.getSize()); + readers[0].taxReader.close(); + readers[0].taxReader = newTaxoReader; + DirectoryReader r2 = DirectoryReader.openIfChanged(readers[0].indexReader); assertNotNull(r2); - // Hold on to the 'original' reader so we can do some checks with it - IndexReader origReader = null; - - assertTrue("Reader must be updated!", readers[0].indexReader != r2); - - // Set the 'original' reader - origReader = readers[0].indexReader; - // Set the new master index Reader + readers[0].indexReader.close(); readers[0].indexReader = r2; - // Try to get total-counts the originalReader AGAIN, just for sanity. Should pull from the cache - not recomputed. - assertTrue("Should be obtained from cache at 6th attempt",totalCounts == - TFC.getTotalCounts(origReader, readers[0].taxReader, iParams, null)); - // now use the new reader - should recompute totalCounts = TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null); prevGen = assertRecomputed(totalCounts, prevGen, "after updating the index - 7th attempt!"); @@ -322,9 +316,7 @@ assertTrue("Should be obtained from cache at 8th attempt",totalCounts == TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null)); - origReader.close(); readers[0].close(); - r2.close(); outputFile.delete(); IOUtils.close(dirs[0]); } @@ -380,7 +372,10 @@ writers[0].taxWriter.addCategory(new CategoryPath("foo", Integer.toString(i))); } writers[0].taxWriter.commit(); - readers[0].taxReader.refresh(); + TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(readers[0].taxReader); + assertNotNull(newTaxoReader); + readers[0].taxReader.close(); + readers[0].taxReader = newTaxoReader; initCache(); Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (revision 1411159) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (working copy) @@ -5,18 +5,17 @@ import java.io.StringWriter; import java.util.ArrayList; import java.util.Arrays; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.RAMDirectory; -import org.junit.Ignore; -import org.junit.Test; - import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.SlowRAMDirectory; +import org.junit.Test; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -35,6 +34,8 @@ * limitations under the License. */ +// TODO: remove this suppress after we fix the TaxoWriter Codec to a non-default (see todo in DirTW) +@SuppressCodecs("SimpleTextCodec") public class TestTaxonomyCombined extends LuceneTestCase { /** The following categories will be added to the taxonomy by @@ -725,7 +726,10 @@ assertEquals(3, ca.getOlderSiblingArray().length); assertEquals(3, ca.getYoungestChildArray().length); // After the refresh, things change: - tr.refresh(); + TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr); + assertNotNull(newtr); + tr.close(); + tr = newtr; ca = tr.getChildrenArrays(); assertEquals(5, tr.getSize()); assertEquals(5, ca.getOlderSiblingArray().length); @@ -737,14 +741,11 @@ indexDir.close(); } - /** - * Test that getParentArrays is valid when retrieved during refresh - */ + // Test that getParentArrays is valid when retrieved during refresh @Test - @Ignore public void testTaxonomyReaderRefreshRaces() throws Exception { // compute base child arrays - after first chunk, and after the other - Directory indexDirBase = newDirectory(); + Directory indexDirBase = newDirectory(); TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase); twBase.addCategory(new CategoryPath("a", "0")); final CategoryPath abPath = new CategoryPath("a", "b"); @@ -757,56 +758,64 @@ final int abOrd = trBase.getOrdinal(abPath); final int abYoungChildBase1 = ca1.getYoungestChildArray()[abOrd]; - for (int i=0; i < 1<<10; i++) { //1024 facets + final int numCategories = atLeast(800); + for (int i = 0; i < numCategories; i++) { twBase.addCategory(new CategoryPath("a", "b", Integer.toString(i))); } - twBase.commit(); + twBase.close(); - trBase.refresh(); + TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase); + assertNotNull(newTaxoReader); + trBase.close(); + trBase = newTaxoReader; final ChildrenArrays ca2 = trBase.getChildrenArrays(); final int abYoungChildBase2 = ca2.getYoungestChildArray()[abOrd]; - for (int retry=0; retry<100; retry++) { - assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry); + int numRetries = atLeast(50); + for (int retry = 0; retry < numRetries; retry++) { + assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories); } + + trBase.close(); indexDirBase.close(); } private void assertConsistentYoungestChild(final CategoryPath abPath, - final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry) + final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry, int numCategories) throws Exception { - SlowRAMDirectory indexDir = new SlowRAMDirectory(-1,null); // no slowness for intialization + SlowRAMDirectory indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); tw.addCategory(new CategoryPath("a", "0")); tw.addCategory(abPath); tw.commit(); - final TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); - for (int i=0; i < 1<<10; i++) { //1024 facets + final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); + for (int i = 0; i < numCategories; i++) { final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i)); tw.addCategory(cp); assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp)); } - tw.commit(); + tw.close(); - final boolean[] stop = new boolean[] { false }; + final AtomicBoolean stop = new AtomicBoolean(false); final Throwable[] error = new Throwable[] { null }; final int retrieval[] = { 0 }; Thread thread = new Thread("Child Arrays Verifier") { @Override public void run() { - setPriority(1+getPriority()); + setPriority(1 + getPriority()); try { - while (!stop[0]) { - int lastOrd = tr.getParentArray().length-1; - assertNotNull("path of last-ord "+lastOrd+" is not found!",tr.getPath(lastOrd)); - assertChildrenArrays(tr.getChildrenArrays(),retry,retrieval[0]++); + while (!stop.get()) { + int lastOrd = tr.getParentArray().length - 1; + assertNotNull("path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd)); + assertChildrenArrays(tr.getChildrenArrays(), retry, retrieval[0]++); + sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms } } catch (Throwable e) { error[0] = e; - stop[0] = true; + stop.set(true); } } @@ -822,13 +831,15 @@ thread.start(); indexDir.setSleepMillis(1); // some delay for refresh - tr.refresh(); + TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); + if (newTaxoReader != null) { + newTaxoReader.close(); + } - stop[0] = true; + stop.set(true); thread.join(); assertNull("Unexpcted exception at retry "+retry+" retrieval "+retrieval[0]+": \n"+stackTraceStr(error[0]), error[0]); - tw.close(); tr.close(); } @@ -885,7 +896,7 @@ // ok } assertEquals(1, tr.getSize()); // still root only... - tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet + assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet try { tr.getParent(author); fail("Before commit() and refresh(), getParent for "+author+" should still throw exception"); @@ -901,7 +912,11 @@ // ok } assertEquals(1, tr.getSize()); // still root only... - tr.refresh(); + TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); + assertNotNull(newTaxoReader); + tr.close(); + tr = newTaxoReader; + try { assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author)); // ok @@ -917,7 +932,10 @@ tw.addCategory(new CategoryPath("Author", "Richard Dawkins")); int dawkins = 2; tw.commit(); - tr.refresh(); + newTaxoReader = TaxonomyReader.openIfChanged(tr); + assertNotNull(newTaxoReader); + tr.close(); + tr = newTaxoReader; assertEquals(author, tr.getParent(dawkins)); assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author)); assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(TaxonomyReader.ROOT_ORDINAL)); @@ -943,16 +961,19 @@ // before commit and refresh, no change: assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); assertEquals(1, tr.getSize()); // still root only... - tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet + assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); assertEquals(1, tr.getSize()); // still root only... tw.commit(); // still not enough before refresh: assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author)); assertEquals(1, tr.getSize()); // still root only... - tr.refresh(); // finally + TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr); + assertNotNull(newTaxoReader); + tr.close(); + tr = newTaxoReader; assertEquals(1, tr.getOrdinal(author)); - assertEquals(2, tr.getSize()); // still root only... + assertEquals(2, tr.getSize()); tw.close(); tr.close(); indexDir.close(); @@ -977,7 +998,7 @@ // Try to open a second writer, with the first one locking the directory. // We expect to get a LockObtainFailedException. try { - new DirectoryTaxonomyWriter(indexDir); + assertNull(new DirectoryTaxonomyWriter(indexDir)); fail("should have failed to write in locked directory"); } catch (LockObtainFailedException e) { // this is what we expect to happen. @@ -989,7 +1010,10 @@ tw2.addCategory(new CategoryPath("hey")); tw2.close(); // See that the writer indeed wrote: - tr.refresh(); + TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr); + assertNotNull(newtr); + tr.close(); + tr = newtr; assertEquals(3, tr.getOrdinal(new CategoryPath("hey"))); tr.close(); tw.close(); @@ -1086,6 +1110,27 @@ indexDir.close(); } + @Test + public void testNRT() throws Exception { + Directory dir = newDirectory(); + DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir); + TaxonomyReader reader = new DirectoryTaxonomyReader(writer); + + CategoryPath cp = new CategoryPath("a"); + writer.addCategory(cp); + TaxonomyReader newReader = TaxonomyReader.openIfChanged(reader); + assertNotNull("expected a new instance", newReader); + assertEquals(2, newReader.getSize()); + assertNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.getOrdinal(cp)); + reader.close(); + reader = newReader; + + writer.close(); + reader.close(); + + dir.close(); + } + // TODO (Facet): test multiple readers, one writer. Have the multiple readers // using the same object (simulating threads) or different objects // (simulating processes). Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (revision 1411159) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (working copy) @@ -3,12 +3,11 @@ import java.util.Random; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -67,11 +66,8 @@ dir.close(); } - /** - * Test the boolean returned by TR.refresh - */ @Test - public void testReaderRefreshResult() throws Exception { + public void testOpenIfChangedResult() throws Exception { Directory dir = null; DirectoryTaxonomyWriter ltw = null; DirectoryTaxonomyReader ltr = null; @@ -84,13 +80,15 @@ ltw.commit(); ltr = new DirectoryTaxonomyReader(dir); - assertFalse("Nothing has changed",ltr.refresh()); + assertNull("Nothing has changed", TaxonomyReader.openIfChanged(ltr)); ltw.addCategory(new CategoryPath("b")); ltw.commit(); - assertTrue("changes were committed",ltr.refresh()); - assertFalse("Nothing has changed",ltr.refresh()); + TaxonomyReader newtr = TaxonomyReader.openIfChanged(ltr); + assertNotNull("changes were committed", newtr); + assertNull("Nothing has changed", TaxonomyReader.openIfChanged(newtr)); + newtr.close(); } finally { IOUtils.close(ltw, ltr, dir); } @@ -122,11 +120,8 @@ doTestReadRecreatedTaxono(random(), true); } - /** - * recreating a taxonomy should work well with a refreshed taxonomy reader - */ @Test - public void testRefreshReadRecreatedTaxonomy() throws Exception { + public void testOpenIfChangedReadRecreatedTaxonomy() throws Exception { doTestReadRecreatedTaxono(random(), false); } @@ -163,13 +158,10 @@ tr.close(); tr = new DirectoryTaxonomyReader(dir); } else { - try { - tr.refresh(); - fail("Expected InconsistentTaxonomyException"); - } catch (InconsistentTaxonomyException e) { - tr.close(); - tr = new DirectoryTaxonomyReader(dir); - } + TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr); + assertNotNull(newtr); + tr.close(); + tr = newtr; } assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumCategories + 1 + k, tr.getSize()); } @@ -179,14 +171,14 @@ } @Test - public void testRefreshAndRefCount() throws Exception { + public void testOpenIfChangedAndRefCount() throws Exception { Directory dir = new RAMDirectory(); // no need for random directories here DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir); taxoWriter.addCategory(new CategoryPath("a")); taxoWriter.commit(); - DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir); + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir); assertEquals("wrong refCount", 1, taxoReader.getRefCount()); taxoReader.incRef(); @@ -194,12 +186,59 @@ taxoWriter.addCategory(new CategoryPath("a", "b")); taxoWriter.commit(); - taxoReader.refresh(); - assertEquals("wrong refCount", 2, taxoReader.getRefCount()); + TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader); + assertNotNull(newtr); + taxoReader.close(); + taxoReader = newtr; + assertEquals("wrong refCount", 1, taxoReader.getRefCount()); taxoWriter.close(); taxoReader.close(); dir.close(); } + @Test + public void testOpenIfChangedManySegments() throws Exception { + // test openIfChanged() when the taxonomy contains many segments + Directory dir = newDirectory(); + + DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) { + @Override + protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) { + IndexWriterConfig conf = super.createIndexWriterConfig(openMode); + LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); + lmp.setMergeFactor(2); + return conf; + } + }; + TaxonomyReader reader = new DirectoryTaxonomyReader(writer); + + int numRounds = random().nextInt(10) + 10; + int numCategories = 1; // one for root + for (int i = 0; i < numRounds; i++) { + int numCats = random().nextInt(4) + 1; + for (int j = 0; j < numCats; j++) { + writer.addCategory(new CategoryPath(Integer.toString(i), Integer.toString(j))); + } + numCategories += numCats + 1 /* one for round-parent */; + TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader); + assertNotNull(newtr); + reader.close(); + reader = newtr; + + // assert categories + assertEquals(numCategories, reader.getSize()); + int roundOrdinal = reader.getOrdinal(new CategoryPath(Integer.toString(i))); + int[] parents = reader.getParentArray(); + assertEquals(0, parents[roundOrdinal]); // round's parent is root + for (int j = 0; j < numCats; j++) { + int ord = reader.getOrdinal(new CategoryPath(Integer.toString(i), Integer.toString(j))); + assertEquals(roundOrdinal, parents[ord]); // round's parent is root + } + } + + reader.close(); + writer.close(); + dir.close(); + } } Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (revision 1411159) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (working copy) @@ -8,7 +8,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache; import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache; import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache; @@ -178,12 +178,14 @@ DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE); touchTaxo(taxoWriter, new CategoryPath("a")); - DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir); + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir); touchTaxo(taxoWriter, new CategoryPath("b")); - // this should not fail - taxoReader.refresh(); + TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader); + taxoReader.close(); + taxoReader = newtr; + assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH))); // now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again. taxoWriter.close(); @@ -195,15 +197,12 @@ touchTaxo(taxoWriter, new CategoryPath("d")); taxoWriter.close(); - // this should fail - try { - taxoReader.refresh(); - fail("IconsistentTaxonomyException should have been thrown"); - } catch (InconsistentTaxonomyException e) { - // ok, expected - } - + newtr = TaxonomyReader.openIfChanged(taxoReader); taxoReader.close(); + taxoReader = newtr; + assertEquals(2, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH))); + + taxoReader.close(); dir.close(); } @@ -221,7 +220,7 @@ DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir); assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH))); - taxoReader.refresh(); + assertNull(TaxonomyReader.openIfChanged(taxoReader)); taxoReader.close(); dir.close();