Index: lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (revision 1405777) +++ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (working copy) @@ -96,7 +96,7 @@ Map readUserCommitData = r.getIndexCommit().getUserData(); assertTrue("wrong value extracted from commit data", "1 2 3".equals(readUserCommitData.get("testing"))); - assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME)); + assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH)); r.close(); // open DirTaxoWriter again and commit, INDEX_CREATE_TIME should still exist @@ -111,7 +111,7 @@ r = DirectoryReader.open(dir); readUserCommitData = r.getIndexCommit().getUserData(); - assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME)); + assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH)); r.close(); dir.close(); @@ -267,10 +267,10 @@ dir.close(); } - private String getCreateTime(Directory taxoDir) throws IOException { + private long getEpoch(Directory taxoDir) throws IOException { SegmentInfos infos = new SegmentInfos(); infos.read(taxoDir); - return infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME); + return Long.parseLong(infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)); } @Test @@ -286,7 +286,7 @@ taxoWriter.addCategory(new CategoryPath("c")); taxoWriter.commit(); - String origCreateTime = getCreateTime(dir); + long origEpoch = getEpoch(dir); // replace the taxonomy with the input one taxoWriter.replaceTaxonomy(input); @@ -298,8 +298,8 @@ taxoWriter.close(); - String newCreateTime = getCreateTime(dir); - assertNotSame("create time should have been changed after replaceTaxonomy", origCreateTime, newCreateTime); + long newEpoch = getEpoch(dir); + assertTrue("index epoch should have been updated after replaceTaxonomy", origEpoch < newEpoch); dir.close(); input.close(); Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (revision 1405777) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (working copy) @@ -174,6 +174,7 @@ this.delimiter = delimiter; } + @Override public int getOrdinal(CategoryPath categoryPath) throws IOException { ensureOpen(); if (categoryPath.length()==0) { @@ -218,6 +219,7 @@ return ret; } + @Override public CategoryPath getPath(int ordinal) throws IOException { ensureOpen(); // TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds @@ -235,6 +237,7 @@ return new CategoryPath(label, delimiter); } + @Override public boolean getPath(int ordinal, CategoryPath result) throws IOException { ensureOpen(); String label = getLabel(ordinal); @@ -296,6 +299,7 @@ return ret; } + @Override public int getParent(int ordinal) { ensureOpen(); // Note how we don't need to hold the read lock to do the following, @@ -327,6 +331,7 @@ * so you should always call getParentArray() again after a refresh(). */ + @Override public int[] getParentArray() { ensureOpen(); // Note how we don't need to hold the read lock to do the following, @@ -339,6 +344,7 @@ // Note that refresh() is synchronized (it is the only synchronized // method in this class) to ensure that it never gets called concurrently // with itself. + @Override public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException { ensureOpen(); /* @@ -361,63 +367,66 @@ // validate that a refresh is valid at this point, i.e. that the taxonomy // was not recreated since this reader was last opened or refresshed. - String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME); - String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME); - if (t1==null) { - if (t2!=null) { + String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH); + String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH); + if (t1 == null) { + if (t2 != null) { r2.close(); - throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2); + throw new InconsistentTaxonomyException("Taxonomy was recreated, epoch= " + t2); } } else if (!t1.equals(t2)) { + // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data. + // it's ok to use String.equals because we require the two epoch values to be the same. r2.close(); - throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+" != "+t1); + throw new InconsistentTaxonomyException("Taxonomy was recreated epoch = " + t2 + " != " + t1); } - IndexReader oldreader = indexReader; - // we can close the old searcher, but need to synchronize this - // so that we don't close it in the middle that another routine - // is reading from it. - indexReaderLock.writeLock().lock(); - indexReader = r2; - indexReaderLock.writeLock().unlock(); - // We can close the old reader, but need to be certain that we - // don't close it while another method is reading from it. - // Luckily, we can be certain of that even without putting the - // oldreader.close() in the locked section. The reason is that - // after lock() succeeded above, we know that all existing readers - // had finished (this is what a read-write lock ensures). New - // readers, starting after the unlock() we just did, already got - // the new indexReader we set above. So nobody can be possibly - // using the old indexReader, and we can close it: - oldreader.close(); - - // We prefetch some of the arrays to make requests much faster. - // Let's refresh these prefetched arrays; This refresh is much - // is made more efficient by assuming that it is enough to read - // the values for new categories (old categories could not have been - // changed or deleted) - // Note that this this done without the write lock being held, - // which means that it is possible that during a refresh(), a - // reader will have some methods (like getOrdinal and getCategory) - // return fresh information, while getParent() - // (only to be prefetched now) still return older information. - // We consider this to be acceptable. The important thing, - // however, is that refreshPrefetchArrays() itself writes to - // the arrays in a correct manner (see discussion there) - parentArray.refresh(indexReader); - - // Remove any INVALID_ORDINAL values from the ordinal cache, - // because it is possible those are now answered by the new data! - Iterator> i = ordinalCache.entrySet().iterator(); - while (i.hasNext()) { - Entry e = i.next(); - if (e.getValue().intValue() == INVALID_ORDINAL) { - i.remove(); - } + IndexReader oldreader = indexReader; + // we can close the old searcher, but need to synchronize this + // so that we don't close it in the middle that another routine + // is reading from it. + indexReaderLock.writeLock().lock(); + indexReader = r2; + indexReaderLock.writeLock().unlock(); + // We can close the old reader, but need to be certain that we + // don't close it while another method is reading from it. + // Luckily, we can be certain of that even without putting the + // oldreader.close() in the locked section. The reason is that + // after lock() succeeded above, we know that all existing readers + // had finished (this is what a read-write lock ensures). New + // readers, starting after the unlock() we just did, already got + // the new indexReader we set above. So nobody can be possibly + // using the old indexReader, and we can close it: + oldreader.close(); + + // We prefetch some of the arrays to make requests much faster. + // Let's refresh these prefetched arrays; This refresh is much + // is made more efficient by assuming that it is enough to read + // the values for new categories (old categories could not have been + // changed or deleted) + // Note that this this done without the write lock being held, + // which means that it is possible that during a refresh(), a + // reader will have some methods (like getOrdinal and getCategory) + // return fresh information, while getParent() + // (only to be prefetched now) still return older information. + // We consider this to be acceptable. The important thing, + // however, is that refreshPrefetchArrays() itself writes to + // the arrays in a correct manner (see discussion there) + parentArray.refresh(indexReader); + + // Remove any INVALID_ORDINAL values from the ordinal cache, + // because it is possible those are now answered by the new data! + Iterator> i = ordinalCache.entrySet().iterator(); + while (i.hasNext()) { + Entry e = i.next(); + if (e.getValue().intValue() == INVALID_ORDINAL) { + i.remove(); } - return true; } + return true; + } + @Override public void close() throws IOException { if (!closed) { synchronized (this) { @@ -440,6 +449,7 @@ ordinalCache.clear(); } + @Override public int getSize() { ensureOpen(); indexReaderLock.readLock().lock(); @@ -450,6 +460,7 @@ } } + @Override public Map getCommitUserData() throws IOException { ensureOpen(); return indexReader.getIndexCommit().getUserData(); @@ -458,6 +469,7 @@ private ChildrenArrays childrenArrays; Object childrenArraysRebuild = new Object(); + @Override public ChildrenArrays getChildrenArrays() { ensureOpen(); // Check if the taxonomy grew since we built the array, and if it @@ -543,9 +555,11 @@ this.youngestChildArray = youngestChildArray; this.olderSiblingArray = olderSiblingArray; } + @Override public int[] getOlderSiblingArray() { return olderSiblingArray; } + @Override public int[] getYoungestChildArray() { return youngestChildArray; } @@ -567,6 +581,7 @@ * Expert: decreases the refCount of this TaxonomyReader instance. If the * refCount drops to 0, then this reader is closed. */ + @Override public void decRef() throws IOException { ensureOpen(); final int rc = refCount.decrementAndGet(); @@ -587,6 +602,7 @@ } /** Expert: returns the current refCount for this taxonomy reader */ + @Override public int getRefCount() { return refCount.get(); } @@ -598,6 +614,7 @@ * Be sure to always call a corresponding decRef(), in a finally clause; * otherwise the reader may never be closed. */ + @Override public void incRef() { ensureOpen(); refCount.incrementAndGet(); Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (revision 1405777) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (working copy) @@ -86,23 +86,24 @@ * @lucene.experimental */ public class DirectoryTaxonomyWriter implements TaxonomyWriter { - + /** - * Property name of user commit data that contains the creation time of a - * taxonomy index. + * Property name of user commit data that contains the index epoch. The epoch + * changes whenever the taxonomy is recreated (i.e. opened with + * {@link OpenMode#CREATE}. *

* Applications should not use this property in their commit data because it * will be overridden by this taxonomy writer. */ - public static final String INDEX_CREATE_TIME = "index.create.time"; - + public static final String INDEX_EPOCH = "index.epoch"; + private final Directory dir; private final IndexWriter indexWriter; private final TaxonomyWriterCache cache; private final AtomicInteger cacheMisses = new AtomicInteger(0); - /** Records the taxonomy index creation time, updated on replaceTaxonomy as well. */ - private String createTime; + // Records the taxonomy index epoch, updated on replaceTaxonomy as well. + private long indexEpoch; private char delimiter = Consts.DEFAULT_DELIMITER; private SinglePositionTokenStream parentStream = new SinglePositionTokenStream(Consts.PAYLOAD_PARENT); @@ -200,28 +201,34 @@ public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyWriterCache cache) throws IOException { - if (!DirectoryReader.indexExists(directory) || openMode==OpenMode.CREATE) { - createTime = Long.toString(System.nanoTime()); + dir = directory; + IndexWriterConfig config = createIndexWriterConfig(openMode); + indexWriter = openIndexWriter(dir, config); + + // verify (to some extent) that merge policy in effect would preserve category docids + assert !(indexWriter.getConfig().getMergePolicy() instanceof TieredMergePolicy) : + "for preserving category docids, merging none-adjacent segments is not allowed"; + + // after we opened the writer, and the index is locked, it's safe to check + // the commit data and read the index epoch + openMode = config.getOpenMode(); + if (!DirectoryReader.indexExists(directory)) { + indexEpoch = 1; } else { + String epochStr = null; Map commitData = readCommitData(directory); if (commitData != null) { - // It is ok if an existing index doesn't have commitData, or the - // INDEX_CREATE_TIME property. If ever it will be recreated, we'll set - // createTime accordingly in the above 'if'. - createTime = commitData.get(INDEX_CREATE_TIME); - } else { - createTime = null; + epochStr = commitData.get(INDEX_EPOCH); } + // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack + // of a better value. + indexEpoch = epochStr == null ? 1 : Long.parseLong(epochStr); } - dir = directory; - IndexWriterConfig config = createIndexWriterConfig(openMode); - indexWriter = openIndexWriter(dir, config); + if (openMode == OpenMode.CREATE) { + ++indexEpoch; + } - // verify (to some extent) that merge policy in effect would preserve category docids - assert !(indexWriter.getConfig().getMergePolicy() instanceof TieredMergePolicy) : - "for preserving category docids, merging none-adjacent segments is not allowed"; - FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setOmitNorms(true); parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft); @@ -670,9 +677,7 @@ if (userData != null) { m.putAll(userData); } - if (createTime != null) { - m.put(INDEX_CREATE_TIME, createTime); - } + m.put(INDEX_EPOCH, Long.toString(indexEpoch)); return m; } @@ -1022,8 +1027,8 @@ cacheIsComplete = false; shouldFillCache = true; - // update createTime as a taxonomy replace is just like it has be recreated - createTime = Long.toString(System.nanoTime()); + // update indexEpoch as a taxonomy replace is just like it has be recreated + ++indexEpoch; } /** Returns the {@link Directory} of this taxonomy writer. */ Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1405777) +++ lucene/CHANGES.txt (working copy) @@ -337,6 +337,11 @@ exception in various places, on reuse of a segment name after crash and recovery. (Uwe Schindler, Robert Muir, Mike McCandless) +* LUCENE-4532: DirectoryTaxonomyWriter use a timestamp to denote taxonomy + index re-creation, which could cause a bug in case machine clocks were + not synced. Instead, it now tracks an 'epoch' version, which is incremented + whenever the taxonomy is re-created, or replaced. (Shai Erera) + Optimizations * LUCENE-4322: Decrease lucene-core JAR size. The core JAR size had increased a