Index: modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java =================================================================== --- modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (revision 1202190) +++ modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (working copy) @@ -1,10 +1,17 @@ package org.apache.lucene.facet.taxonomy.directory; +import java.util.Random; + import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; @@ -77,4 +84,68 @@ dir.close(); } + /** + * recreating a taxonomy should work well with a freshly opened taxonomy reader + */ + @Test + public void testFreshReadRecreatedTaxonomy() throws Exception { + doTestReadRecreatedTaxono(random, true); + } + + /** + * recreating a taxonomy should work well with a refreshed taxonomy reader + */ + @Test + public void testRefreshReadRecreatedTaxonomy() throws Exception { + doTestReadRecreatedTaxono(random, false); + } + + private void doTestReadRecreatedTaxono(Random random, boolean closeReader) throws Exception { + Directory dir = null; + TaxonomyWriter tw = null; + TaxonomyReader tr = null; + + // prepare a few categories + int n = 10; + CategoryPath[] cp = new CategoryPath[n]; + for (int i=0; i getCommitUserData() { //TODO remove this method once adding this to FilterIndexReader + return in.getCommitUserData(); + } } private class InstrumentedIndexWriter extends IndexWriter { int mynum; Index: modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java =================================================================== --- modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (revision 1202190) +++ modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (working copy) @@ -120,6 +120,14 @@ * faceted search, the taxonomy reader's refresh() should be called only after * a reopen() of the main index. *

+ * Refreshing the taxonomy might fail in some cases, for example + * if the taxonomy was recreated since this instance was opened or last refreshed. + * In this case an {@link InconsistentTaxonomyException} is thrown, + * suggesting that in order to obtain up-to-date taxonomy data a new + * {@link TaxonomyReader} should be opened. Note: This {@link TaxonomyReader} + * instance remains unchanged and usable in this case, and the application can + * continue to use it, and should still {@link #close()} when no longer needed. + *

* It should be noted that refresh() is similar in purpose to * IndexReader.reopen(), but the two methods behave differently. refresh() * refreshes the existing TaxonomyReader object, rather than opening a new one @@ -130,7 +138,7 @@ * this new snapshots in all threads (whether new or old) is fine. This saves * us needing to keep multiple copies of the taxonomy open in memory. */ - public void refresh() throws IOException; + public void refresh() throws IOException, InconsistentTaxonomyException; /** * getParent() returns the ordinal of the parent category of the category Index: modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java =================================================================== --- modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (revision 1202190) +++ modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (working copy) @@ -9,6 +9,7 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.util.HashMap; import java.util.Map; import org.apache.lucene.analysis.core.KeywordAnalyzer; @@ -103,6 +104,12 @@ private boolean cacheIsComplete; private IndexReader reader; private int cacheMisses; + + /** + * When a taxonomy is created, we mark that its create time shoudl be committed in the + * next commit. + */ + private String taxoCreateTimeToCommit = null; /** * setDelimiter changes the character that the taxonomy uses in its internal @@ -170,6 +177,10 @@ throws CorruptIndexException, LockObtainFailedException, IOException { + if (!IndexReader.indexExists(directory) || OpenMode.CREATE.equals(openMode)) { + taxoCreateTimeToCommit = Long.toString(System.nanoTime()); + } + indexWriter = openIndexWriter(directory, openMode); reader = null; @@ -278,6 +289,10 @@ @Override public synchronized void close() throws CorruptIndexException, IOException { if (indexWriter != null) { + if (taxoCreateTimeToCommit != null) { + indexWriter.commit(combinedCommitData(null)); + taxoCreateTimeToCommit = null; + } indexWriter.close(); indexWriter = null; } @@ -570,18 +585,40 @@ */ @Override public synchronized void commit() throws CorruptIndexException, IOException { - indexWriter.commit(); + if (taxoCreateTimeToCommit != null) { + indexWriter.commit(combinedCommitData(null)); + taxoCreateTimeToCommit = null; + } else { + indexWriter.commit(); + } refreshReader(); } /** + * Combine original user data with that of the taxonomy creation time + */ + private Map combinedCommitData(Map userData) { + Map m = new HashMap(); + if (userData != null) { + m.putAll(userData); + } + m.put(DirectoryTaxonomyReader.DIR_TAXONOMY_CREATE_TIME_PROP, taxoCreateTimeToCommit); + return m; + } + + /** * Like commit(), but also store properties with the index. These properties * are retrievable by {@link DirectoryTaxonomyReader#getCommitUserData}. * See {@link TaxonomyWriter#commit(Map)}. */ @Override public synchronized void commit(Map commitUserData) throws CorruptIndexException, IOException { - indexWriter.commit(commitUserData); + if (taxoCreateTimeToCommit != null) { + indexWriter.commit(combinedCommitData(commitUserData)); + taxoCreateTimeToCommit = null; + } else { + indexWriter.commit(commitUserData); + } refreshReader(); } @@ -591,7 +628,12 @@ */ @Override public synchronized void prepareCommit() throws CorruptIndexException, IOException { - indexWriter.prepareCommit(); + if (taxoCreateTimeToCommit != null) { + indexWriter.prepareCommit(combinedCommitData(null)); + taxoCreateTimeToCommit = null; + } else { + indexWriter.prepareCommit(); + } } /** @@ -600,7 +642,12 @@ */ @Override public synchronized void prepareCommit(Map commitUserData) throws CorruptIndexException, IOException { - indexWriter.prepareCommit(commitUserData); + if (taxoCreateTimeToCommit != null) { + indexWriter.prepareCommit(combinedCommitData(commitUserData)); + taxoCreateTimeToCommit = null; + } else { + indexWriter.prepareCommit(commitUserData); + } } /** Index: modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java =================================================================== --- modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (revision 1202190) +++ modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (working copy) @@ -10,7 +10,9 @@ import java.util.logging.Logger; import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocsEnum; @@ -55,6 +57,14 @@ public class DirectoryTaxonomyReader implements TaxonomyReader { private static final Logger logger = Logger.getLogger(DirectoryTaxonomyReader.class.getName()); + + /** + * Property name of user commit data that contains the creation time of a taxonomy index. + *

+ * Applications making use of {@link TaxonomyWriter#commit(Map)} should not use this + * particular property name. + */ + public static final String DIR_TAXONOMY_CREATE_TIME_PROP = "dir.taxo.create.time"; private IndexReader indexReader; @@ -333,7 +343,7 @@ // Note that refresh() is synchronized (it is the only synchronized // method in this class) to ensure that it never gets called concurrently // with itself. - public synchronized void refresh() throws IOException { + public synchronized void refresh() throws IOException, InconsistentTaxonomyException { ensureOpen(); /* * Since refresh() can be a lengthy operation, it is very important that we @@ -349,49 +359,65 @@ // no other thread can be writing at this time (this method is the // only possible writer, and it is "synchronized" to avoid this case). IndexReader r2 = IndexReader.openIfChanged(indexReader); - if (r2 != null) { - IndexReader oldreader = indexReader; - // we can close the old searcher, but need to synchronize this - // so that we don't close it in the middle that another routine - // is reading from it. - indexReaderLock.writeLock().lock(); - indexReader = r2; - indexReaderLock.writeLock().unlock(); - // We can close the old reader, but need to be certain that we - // don't close it while another method is reading from it. - // Luckily, we can be certain of that even without putting the - // oldreader.close() in the locked section. The reason is that - // after lock() succeeded above, we know that all existing readers - // had finished (this is what a read-write lock ensures). New - // readers, starting after the unlock() we just did, already got - // the new indexReader we set above. So nobody can be possibly - // using the old indexReader, and we can close it: - oldreader.close(); + if (r2 == null) { + return; // no changes, nothing to do + } + + // validate that a refresh is valid at this point, i.e. that the taxonomy + // was not recreated since this reader was last opened or refresshed. + String t1 = indexReader.getCommitUserData().get(DIR_TAXONOMY_CREATE_TIME_PROP); + String t2 = r2.getCommitUserData().get(DIR_TAXONOMY_CREATE_TIME_PROP); + if (t1==null) { + if (t2!=null) { + r2.close(); + throw new InconsistentTaxonomyException(); + } + } else if (!t1.equals(t2)) { + r2.close(); + throw new InconsistentTaxonomyException(); + } + + IndexReader oldreader = indexReader; + // we can close the old searcher, but need to synchronize this + // so that we don't close it in the middle that another routine + // is reading from it. + indexReaderLock.writeLock().lock(); + indexReader = r2; + indexReaderLock.writeLock().unlock(); + // We can close the old reader, but need to be certain that we + // don't close it while another method is reading from it. + // Luckily, we can be certain of that even without putting the + // oldreader.close() in the locked section. The reason is that + // after lock() succeeded above, we know that all existing readers + // had finished (this is what a read-write lock ensures). New + // readers, starting after the unlock() we just did, already got + // the new indexReader we set above. So nobody can be possibly + // using the old indexReader, and we can close it: + oldreader.close(); - // We prefetch some of the arrays to make requests much faster. - // Let's refresh these prefetched arrays; This refresh is much - // is made more efficient by assuming that it is enough to read - // the values for new categories (old categories could not have been - // changed or deleted) - // Note that this this done without the write lock being held, - // which means that it is possible that during a refresh(), a - // reader will have some methods (like getOrdinal and getCategory) - // return fresh information, while getParent() - // (only to be prefetched now) still return older information. - // We consider this to be acceptable. The important thing, - // however, is that refreshPrefetchArrays() itself writes to - // the arrays in a correct manner (see discussion there) - parentArray.refresh(indexReader); + // We prefetch some of the arrays to make requests much faster. + // Let's refresh these prefetched arrays; This refresh is much + // is made more efficient by assuming that it is enough to read + // the values for new categories (old categories could not have been + // changed or deleted) + // Note that this this done without the write lock being held, + // which means that it is possible that during a refresh(), a + // reader will have some methods (like getOrdinal and getCategory) + // return fresh information, while getParent() + // (only to be prefetched now) still return older information. + // We consider this to be acceptable. The important thing, + // however, is that refreshPrefetchArrays() itself writes to + // the arrays in a correct manner (see discussion there) + parentArray.refresh(indexReader); - // Remove any INVALID_ORDINAL values from the ordinal cache, - // because it is possible those are now answered by the new data! - Iterator> i = ordinalCache.entrySet().iterator(); - while (i.hasNext()) { - Entry e = i.next(); - if (e.getValue().intValue() == INVALID_ORDINAL) { - i.remove(); - } - } + // Remove any INVALID_ORDINAL values from the ordinal cache, + // because it is possible those are now answered by the new data! + Iterator> i = ordinalCache.entrySet().iterator(); + while (i.hasNext()) { + Entry e = i.next(); + if (e.getValue().intValue() == INVALID_ORDINAL) { + i.remove(); + } } } Index: modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java =================================================================== --- modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java (revision 0) +++ modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java (working copy) @@ -0,0 +1,32 @@ +package org.apache.lucene.facet.taxonomy; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Exception indicating that a certain operation could not be performed + * on a taxonomy related object because of an inconsistency. + *

+ * For example, trying to refresh a taxonomy reader might fail in case + * the underlying taxonomy was meanwhile modified in a manner which + * does not allow to perform such a refresh. (See {@link TaxonomyReader#refresh()}.) + * + * @lucene.experimental + */ +public class InconsistentTaxonomyException extends Exception { + +} Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 1202190) +++ lucene/contrib/CHANGES.txt (working copy) @@ -158,6 +158,13 @@ * LUCENE-3542: Group expanded query terms to preserve parent boolean operator in StandartQueryParser. (Simon Willnauer) + + * LUCENE-3573: TaxonomyReader.refresh() was broken in case that the taxonomy was + recreated since the taxonomy reader was last refreshed or opened. TR.refresh() + now detects this situation and throws an InconsistentTaxonomyException. + When obtaining such an exception the application should open a new taxonomy + reader. As always, the application should close the old taxonomy reader, once + not using it anymore. (Doron Cohen) API Changes