Index: lucene/contrib/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomies.java =================================================================== --- lucene/contrib/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomies.java (revision 1339027) +++ lucene/contrib/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomies.java (working copy) @@ -1,254 +0,0 @@ -package org.apache.lucene.facet.taxonomy.directory; - -import java.io.File; - -import org.apache.lucene.store.Directory; -import org.junit.Test; - -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -public class TestAddTaxonomies extends LuceneTestCase { - - @Test - public void test1() throws Exception { - Directory dir1 = newDirectory(); - DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dir1); - tw1.addCategory(new CategoryPath("Author", "Mark Twain")); - tw1.addCategory(new CategoryPath("Animals", "Dog")); - Directory dir2 = newDirectory(); - DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(dir2); - tw2.addCategory(new CategoryPath("Author", "Rob Pike")); - tw2.addCategory(new CategoryPath("Aardvarks", "Bob")); - tw2.close(); - Directory dir3 = newDirectory(); - DirectoryTaxonomyWriter tw3 = new DirectoryTaxonomyWriter(dir3); - tw3.addCategory(new CategoryPath("Author", "Zebra Smith")); - tw3.addCategory(new CategoryPath("Aardvarks", "Bob")); - tw3.addCategory(new CategoryPath("Aardvarks", "Aaron")); - tw3.close(); - - MemoryOrdinalMap[] maps = new MemoryOrdinalMap[2]; - maps[0] = new MemoryOrdinalMap(); - maps[1] = new MemoryOrdinalMap(); - - tw1.addTaxonomies(new Directory[] { dir2, dir3 }, maps); - tw1.close(); - - TaxonomyReader tr = new DirectoryTaxonomyReader(dir1); - - // Test that the merged taxonomy now contains what we expect: - // First all the categories of the original taxonomy, in their original order: - assertEquals(tr.getPath(0).toString(), ""); - assertEquals(tr.getPath(1).toString(), "Author"); - assertEquals(tr.getPath(2).toString(), "Author/Mark Twain"); - assertEquals(tr.getPath(3).toString(), "Animals"); - assertEquals(tr.getPath(4).toString(), "Animals/Dog"); - // Then the categories new in the new taxonomy, in alphabetical order: - assertEquals(tr.getPath(5).toString(), "Aardvarks"); - assertEquals(tr.getPath(6).toString(), "Aardvarks/Aaron"); - assertEquals(tr.getPath(7).toString(), "Aardvarks/Bob"); - assertEquals(tr.getPath(8).toString(), "Author/Rob Pike"); - assertEquals(tr.getPath(9).toString(), "Author/Zebra Smith"); - assertEquals(tr.getSize(), 10); - - // Test that the maps contain what we expect - int[] map0 = maps[0].getMap(); - assertEquals(5, map0.length); - assertEquals(0, map0[0]); - assertEquals(1, map0[1]); - assertEquals(8, map0[2]); - assertEquals(5, map0[3]); - assertEquals(7, map0[4]); - - int[] map1 = maps[1].getMap(); - assertEquals(6, map1.length); - assertEquals(0, map1[0]); - assertEquals(1, map1[1]); - assertEquals(9, map1[2]); - assertEquals(5, map1[3]); - assertEquals(7, map1[4]); - assertEquals(6, map1[5]); - - tr.close(); - dir1.close(); - dir2.close(); - dir3.close(); - } - - // a reasonable random test - public void testmedium() throws Exception { - int numTests = atLeast(3); - for (int i = 0; i < numTests; i++) { - dotest(_TestUtil.nextInt(random, 1, 10), - _TestUtil.nextInt(random, 1, 100), - _TestUtil.nextInt(random, 100, 1000), - random.nextBoolean()); - } - } - - // A more comprehensive and big random test. - @Test @Nightly - public void testbig() throws Exception { - dotest(2, 1000, 5000, false); - dotest(10, 10000, 100, false); - dotest(50, 20, 100, false); - dotest(10, 1000, 10000, false); - dotest(50, 20, 10000, false); - dotest(1, 20, 10000, false); - dotest(10, 1, 10000, false); - dotest(10, 1000, 20000, true); - } - - private void dotest(int ntaxonomies, int ncats, int range, boolean disk) throws Exception { - Directory dirs[] = new Directory[ntaxonomies]; - Directory copydirs[] = new Directory[ntaxonomies]; - - for (int i=0; i1) { - for (int i=0; i= copytr.getSize()); - } else { - assertEquals(copytr.getSize(), tr.getSize()); - } - for (int j=0; j copytr.getSize()) { - String prev = tr.getPath(copytr.getSize()).toString(); - for (int j=copytr.getSize()+1; j1) { - for (int i=0; i= copytr.getSize()); - } else { - assertEquals(copytr.getSize(), tr.getSize()); - } - for (int j=0; j copytr.getSize()) { - String prev = tr.getPath(copytr.getSize()).toString(); - for (int j=copytr.getSize()+1; j 0); + assertEquals(destOrdinal, map[j]); + } + } finally { + srcTR.close(); } + } finally { + destTR.close(); } - int oldsize = copytr.getSize(); // remember for later - tr.close(); - copytr.close(); + } - // Check that all the categories from other taxonomies exist in the new - // taxonomy. - TaxonomyReader main = new DirectoryTaxonomyReader(dirs[0]); - for (int i=1; i - * Additionally, fill a mapping for each of the added taxonomies, - * mapping its ordinals to the ordinals in the enlarged main taxonomy. - * These mapping are saved into an array of OrdinalMap objects given by the - * user, one for each of the given taxonomies (not including "this", the main - * taxonomy). Often the first of these will be a MemoryOrdinalMap and the - * others will be a DiskOrdinalMap - see discussion in {OrdinalMap}. - *

- * Note that the taxonomies to be added are given as Directory objects, - * not opened TaxonomyReader/TaxonomyWriter objects, so if any of them are - * currently managed by an open TaxonomyWriter, make sure to commit() (or - * close()) it first. The main taxonomy (this) is an open TaxonomyWriter, - * and does not need to be commit()ed before this call. + * Takes the categories from the given taxonomy directory, and adds the + * missing ones to this taxonomy. Additionally, it fills the given + * {@link OrdinalMap} with a mapping from the original ordinal to the new + * ordinal. */ - public void addTaxonomies(Directory[] taxonomies, OrdinalMap[] ordinalMaps) throws IOException { + public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException { ensureOpen(); - // To prevent us stepping on the rest of this class's decisions on when - // to open a reader, and when not, we'll be opening a new reader instead - // of using the existing "reader" object: - IndexReader mainreader = openReader(); - TermEnum mainte = mainreader.terms(new Term(Consts.FULL)); - - IndexReader[] otherreaders = new IndexReader[taxonomies.length]; - TermEnum[] othertes = new TermEnum[taxonomies.length]; - for (int i=0; i0) { - String first=null; - for (int i=0; i0) { - first = currentOthers[i]; - } - } - int comp = 0; - if (currentMain==null || (comp = currentMain.compareTo(first))>0) { - // If 'first' is before currentMain, or currentMain is null, - // then 'first' is a new category and we need to add it to the - // main taxonomy. Then for all taxonomies with this 'first' - // category, we need to add the new category number to their - // map, and move to the next category in all of them. + IndexReader r = IndexReader.open(taxoDir); + try { + final int size = r.numDocs(); + final OrdinalMap ordinalMap = map; + ordinalMap.setSize(size); + CategoryPath cp = new CategoryPath(); + TermEnum te = r.terms(Consts.FULL_TERM); + TermDocs docs = r.termDocs(); + // we call next() first, to skip the root category which always exists. + while (te.next()) { + Term term = te.term(); + if (term.field() != Consts.FULL) break; cp.clear(); - cp.add(first, delimiter); - // We can call internalAddCategory() instead of addCategory() - // because we know the category hasn't been seen yet. - int newordinal = internalAddCategory(cp, cp.length()); - // TODO (Facet): we already had this term in our hands before, in nextTE... - Term t = new Term(Consts.FULL, first); - for (int i=0; i 0 */ { - // The currentMain doesn't appear in any of the other taxonomies - - // we don't need to do anything, just continue to the next one - currentMain = nextTE(mainte); + docs.seek(term); + docs.next(); + ordinalMap.addMapping(docs.doc(), ordinal); } + // we must add the root ordinal map, so that the map will be complete + // (otherwise e.g. DiskOrdinalMap may fail because it expects more + // categories to exist in the file). + ordinalMap.addMapping(0, 0); + ordinalMap.addDone(); + } finally { + r.close(); } - - // Close all the readers we've opened, and also tell the ordinal maps - // we're done adding to them - mainreader.close(); - for (int i=0; i=indexReader.maxDoc()) { return null; } - ret = indexReader.document(catID, Consts.fullPathSelector) - .get(Consts.FULL); + ret = indexReader.document(catID, Consts.fullPathSelector).get(Consts.FULL); } finally { indexReaderLock.readLock().unlock(); } Index: lucene/contrib/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java =================================================================== --- lucene/contrib/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java (revision 1339027) +++ lucene/contrib/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java (working copy) @@ -2,6 +2,7 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; +import org.apache.lucene.index.Term; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -26,6 +27,12 @@ abstract class Consts { static final String FULL = "$full_path$"; + /** + * A {@link #FULL} {@link Term} - use it for creating new terms instead of + * calling {@link Term#Term(String, String)}, by calling + * {@link Term#createTerm(String)} -- the latter does not do string interning. + */ + static final Term FULL_TERM = new Term(FULL); static final String FIELD_PAYLOADS = "$payloads$"; static final String PAYLOAD_PARENT = "p"; static final char[] PAYLOAD_PARENT_CHARS = PAYLOAD_PARENT.toCharArray(); Index: lucene/contrib/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java =================================================================== --- lucene/contrib/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java (revision 1339027) +++ lucene/contrib/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java (working copy) @@ -81,7 +81,7 @@ OrdinalMap map, IndexWriter destIndexWriter, DirectoryTaxonomyWriter destTaxWriter) throws IOException { // merge the taxonomies - destTaxWriter.addTaxonomies(new Directory[] { srcTaxDir }, new OrdinalMap[] { map }); + destTaxWriter.addTaxonomy(srcTaxDir, map); PayloadProcessorProvider payloadProcessor = new FacetsPayloadProcessorProvider( srcIndexDir, map.getMap(), new DefaultFacetIndexingParams()); Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 1339027) +++ lucene/contrib/CHANGES.txt (working copy) @@ -25,6 +25,11 @@ wrongly used minimum sort buffer resulting in potentially slow FST & Dictionary building. (Simon Willnauer) + * LUCENE-4060: Fix a synchronization bug in + DirectoryTaxonomyWriter.addTaxonomies(). Also, the method has been renamed to + addTaxonomy and now takes only one Directory and one OrdinalMap. + (Shai Erera, Gilad Barkai) + Tests * LUCENE-3919, LUCENE-3969, LUCENE-3990: more thorough testing of analysis