Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 944220) +++ lucene/CHANGES.txt (working copy) @@ -74,6 +74,11 @@ it cannot delete the lock file, since obtaining the lock does not fail if the file is there. (Shai Erera) +* LUCENE-2455: IndexWriter.addIndexes no longer optimizes the target index + before it adds the new ones. Also, the existing segments are not merged and so + the index will not end up with a single segment (unles it was empty before). + (Shai Erera) + API Changes * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory. (George @@ -161,6 +166,10 @@ commit points when they are not needed anymore (instead of waiting for the next commit). (Shai Erera) +* LUCENE-2455: IndexWriter.addIndexesNoOptimize was deprecated in favor of + addIndexes(Directory...). The method operates the same - just renamed. + (Shai Erera) + Bug fixes * LUCENE-2119: Don't throw NegativeArraySizeException if you pass Index: lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java =================================================================== --- lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java (revision 944220) +++ lucene/contrib/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java (working copy) @@ -49,7 +49,7 @@ } System.out.println("Merging..."); - writer.addIndexesNoOptimize(indexes); + writer.addIndexes(indexes); System.out.println("Optimizing..."); writer.optimize(); Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriter.java (revision 944220) +++ lucene/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -3039,26 +3039,34 @@ private void noDupDirs(Directory... dirs) { HashSet dups = new HashSet(); - for(int i=0;iThis may be used to parallelize batch indexing. A large document - * collection can be broken into sub-collections. Each sub-collection can be - * indexed in parallel, on a different thread, process or machine. The + *

This may be used to parallelize batch indexing. A large document + * collection can be broken into sub-collections. Each sub-collection can be + * indexed in parallel, on a different thread, process or machine. The * complete index can then be created by merging sub-collection indexes * with this method. * - *

NOTE: the index in each Directory must not be + *

+ * NOTE: the index in each {@link Directory} must not be * changed (opened by a writer) while this method is * running. This method does not acquire a write lock in * each input Directory, so it is up to the caller to @@ -3072,35 +3080,32 @@ * handled: it does not commit a new segments_N file until * all indexes are added. This means if an Exception * occurs (for example disk full), then either no indexes - * will have been added or they all will have been.

+ * will have been added or they all will have been. * *

Note that this requires temporary free space in the - * Directory up to 2X the sum of all input indexes - * (including the starting index). If readers/searchers + * {@link Directory} up to 2X the sum of all input indexes + * (including the starting index). If readers/searchers * are open against the starting index, then temporary * free space required will be higher by the size of the * starting index (see {@link #optimize()} for details). - *

* *

Once this completes, the final size of the index * will be less than the sum of all input index sizes - * (including the starting index). It could be quite a + * (including the starting index). It could be quite a * bit smaller (if there were many pending deletes) or - * just slightly smaller.

+ * just slightly smaller. * + *

This requires this index not be among those to be added. + * *

- * This requires this index not be among those to be added. + * NOTE: if this method hits an OutOfMemoryError + * you should immediately close the writer. See above for details. * - *

NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

- * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void addIndexesNoOptimize(Directory... dirs) - throws CorruptIndexException, IOException { - + public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException { ensureOpen(); noDupDirs(dirs); @@ -3110,7 +3115,7 @@ try { if (infoStream != null) - message("flush at addIndexesNoOptimize"); + message("flush at addIndexes(Directory...)"); flush(true, false, true); boolean success = false; @@ -3123,16 +3128,10 @@ synchronized(this) { ensureOpen(); - for (int i = 0; i < dirs.length; i++) { - if (directory == dirs[i]) { - // cannot add this index: segments may be deleted in merge before added - throw new IllegalArgumentException("Cannot add this index to itself"); - } - + for (Directory dir : dirs) { SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.read(dirs[i]); - for (int j = 0; j < sis.size(); j++) { - SegmentInfo info = sis.info(j); + sis.read(dir); + for (SegmentInfo info : sis) { assert !segmentInfos.contains(info): "dup info dir=" + info.dir + " name=" + info.name; docCount += info.docCount; segmentInfos.add(info); // add each info @@ -3166,7 +3165,7 @@ } } } catch (OutOfMemoryError oom) { - handleOOM(oom, "addIndexesNoOptimize"); + handleOOM(oom, "addIndexes(Directory...)"); } finally { if (docWriter != null) { docWriter.resumeAllThreads(); @@ -3184,7 +3183,7 @@ * currently running merges (in the background) complete. * We don't return until the SegmentInfos has no more * external segments. Currently this is only used by - * addIndexesNoOptimize(). */ + * addIndexes(Directory...). */ private void resolveExternalSegments() throws CorruptIndexException, IOException { boolean any = false; @@ -3253,107 +3252,68 @@ mergeScheduler.merge(this); } - /** Merges the provided indexes into this index. - *

After this completes, the index is optimized.

- *

The provided IndexReaders are not closed.

+ /** + * Merges the provided indexes into this index. This method is useful + * if you use extensions of {@link IndexReader}. Otherwise, using + * {@link #addIndexes(Directory...)} is highly recommended for performance + * reasons. It uses the {@link MergeScheduler} and {@link MergePolicy} set + * on this writer, which may perform merges in parallel. + * + *

The provided IndexReaders are not closed. * + *

NOTE: this method does not merge the current segments, + * only the incoming ones. + * *

NOTE: while this is running, any attempts to * add or delete documents (with another thread) will be * paused until this method completes. * - *

See {@link #addIndexesNoOptimize} for - * details on transactional semantics, temporary free - * space required in the Directory, and non-CFS segments - * on an Exception.

+ *

See {@link #addIndexes(Directory...)} for details on transactional + * semantics, temporary free space required in the Directory, + * and non-CFS segments on an Exception. * *

NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See above for details.

+ * href="#OOME">above for details. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void addIndexes(IndexReader... readers) - throws CorruptIndexException, IOException { + public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException { ensureOpen(); // Do not allow add docs or deletes while we are running: docWriter.pauseAllThreads(); - // We must pre-acquire a read lock here (and upgrade to - // write lock in startTransaction below) so that no - // other addIndexes is allowed to start up after we have - // flushed & optimized but before we then start our - // transaction. This is because the merging below - // requires that only one segment is present in the - // index: - acquireRead(); - try { + startTransaction(false); SegmentInfo info = null; String mergedName = null; SegmentMerger merger = null; - boolean success = false; - + try { - flush(true, false, true); - optimize(); // start with zero or 1 seg - success = true; - } finally { - // Take care to release the read lock if we hit an - // exception before starting the transaction - if (!success) - releaseRead(); - } - - // true means we already have a read lock; if this - // call hits an exception it will release the write - // lock: - startTransaction(true); - - try { mergedName = newSegmentName(); merger = new SegmentMerger(this, mergedName, null); - SegmentReader sReader = null; + for (IndexReader reader : readers) // add new indexes + merger.add(reader); + + int docCount = merger.merge(); // merge 'em + synchronized(this) { - if (segmentInfos.size() == 1) { // add existing index, if any - sReader = readerPool.get(segmentInfos.info(0), true, BufferedIndexInput.BUFFER_SIZE, -1); - } + info = new SegmentInfo(mergedName, docCount, directory, false, true, + -1, null, false, merger.hasProx()); + setDiagnostics(info, "addIndexes(IndexReader...)"); + segmentInfos.add(info); } - success = false; - - try { - if (sReader != null) - merger.add(sReader); - - for (int i = 0; i < readers.length; i++) // add new indexes - merger.add(readers[i]); - - int docCount = merger.merge(); // merge 'em - - synchronized(this) { - segmentInfos.clear(); // pop old infos & add new - info = new SegmentInfo(mergedName, docCount, directory, false, true, - -1, null, false, merger.hasProx()); - setDiagnostics(info, "addIndexes(IndexReader...)"); - segmentInfos.add(info); - } - - // Notify DocumentsWriter that the flushed count just increased - docWriter.updateFlushedDocCount(docCount); - - success = true; - - } finally { - if (sReader != null) { - readerPool.release(sReader); - } - } + // Notify DocumentsWriter that the flushed count just increased + docWriter.updateFlushedDocCount(docCount); + + success = true; } finally { if (!success) { if (infoStream != null) @@ -4963,10 +4923,10 @@ * Sets the {@link PayloadProcessorProvider} to use when merging payloads. * Note that the given pcp will be invoked for every segment that * is merged, not only external ones that are given through - * {@link IndexWriter#addIndexes} or {@link IndexWriter#addIndexesNoOptimize}. - * If you want only the payloads of the external segments to be processed, you - * can return null whenever a {@link DirPayloadProcessor} is - * requested for the {@link Directory} of the {@link IndexWriter}. + * {@link #addIndexes}. If you want only the payloads of the external segments + * to be processed, you can return null whenever a + * {@link DirPayloadProcessor} is requested for the {@link Directory} of the + * {@link IndexWriter}. *

* The default is null which means payloads are processed * normally (copied) during segment merges. You can also unset it by passing Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (revision 944220) +++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (working copy) @@ -30,7 +30,8 @@ import org.apache.lucene.search.PhraseQuery; -public class TestAddIndexesNoOptimize extends LuceneTestCase { +public class TestAddIndexes extends LuceneTestCase { + public void testSimpleCase() throws IOException { // main directory Directory dir = new RAMDirectory(); @@ -65,7 +66,7 @@ // test doc count before segments are merged writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); assertEquals(100, writer.maxDoc()); - writer.addIndexesNoOptimize(new Directory[] { aux, aux2 }); + writer.addIndexes(new Directory[] { aux, aux2 }); assertEquals(190, writer.maxDoc()); writer.close(); @@ -86,7 +87,7 @@ // test doc count before segments are merged/index is optimized writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); assertEquals(190, writer.maxDoc()); - writer.addIndexesNoOptimize(new Directory[] { aux3 }); + writer.addIndexes(new Directory[] { aux3 }); assertEquals(230, writer.maxDoc()); writer.close(); @@ -117,7 +118,7 @@ writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); assertEquals(230, writer.maxDoc()); - writer.addIndexesNoOptimize(new Directory[] { aux4 }); + writer.addIndexes(new Directory[] { aux4 }); assertEquals(231, writer.maxDoc()); writer.close(); @@ -134,7 +135,7 @@ setUpDirs(dir, aux); IndexWriter writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); - writer.addIndexesNoOptimize(new Directory[] {aux}); + writer.addIndexes(new Directory[] {aux}); // Adds 10 docs, then replaces them with another 10 // docs, so 10 pending deletes: @@ -182,7 +183,7 @@ writer.updateDocument(new Term("id", "" + (i%10)), doc); } - writer.addIndexesNoOptimize(new Directory[] {aux}); + writer.addIndexes(new Directory[] {aux}); // Deletes one of the 10 added docs, leaving 9: PhraseQuery q = new PhraseQuery(); @@ -227,7 +228,7 @@ q.add(new Term("content", "14")); writer.deleteDocuments(q); - writer.addIndexesNoOptimize(new Directory[] {aux}); + writer.addIndexes(new Directory[] {aux}); writer.optimize(); writer.commit(); @@ -271,7 +272,7 @@ writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); try { // cannot add self - writer.addIndexesNoOptimize(new Directory[] { aux, dir }); + writer.addIndexes(new Directory[] { aux, dir }); assertTrue(false); } catch (IllegalArgumentException e) { @@ -284,7 +285,7 @@ } // in all the remaining tests, make the doc count of the oldest segment - // in dir large so that it is never merged in addIndexesNoOptimize() + // in dir large so that it is never merged in addIndexes() // case 1: no tail segments public void testNoTailSegments() throws IOException { // main directory @@ -300,7 +301,7 @@ ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); addDocs(writer, 10); - writer.addIndexesNoOptimize(new Directory[] { aux }); + writer.addIndexes(new Directory[] { aux }); assertEquals(1040, writer.maxDoc()); assertEquals(2, writer.getSegmentCount()); assertEquals(1000, writer.getDocCount(0)); @@ -323,7 +324,7 @@ ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); addDocs(writer, 2); - writer.addIndexesNoOptimize(new Directory[] { aux }); + writer.addIndexes(new Directory[] { aux }); assertEquals(1032, writer.maxDoc()); assertEquals(2, writer.getSegmentCount()); assertEquals(1000, writer.getDocCount(0)); @@ -347,7 +348,7 @@ .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(10)); ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); - writer.addIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); + writer.addIndexes(new Directory[] { aux, new RAMDirectory(aux) }); assertEquals(1060, writer.maxDoc()); assertEquals(1000, writer.getDocCount(0)); writer.close(); @@ -377,7 +378,7 @@ .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(4)); ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); - writer.addIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); + writer.addIndexes(new Directory[] { aux, new RAMDirectory(aux) }); assertEquals(1020, writer.maxDoc()); assertEquals(1000, writer.getDocCount(0)); writer.close(); @@ -400,7 +401,7 @@ TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(100)); ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(10); - writer.addIndexesNoOptimize(new Directory[] { aux }); + writer.addIndexes(new Directory[] { aux }); assertEquals(30, writer.maxDoc()); assertEquals(3, writer.getSegmentCount()); writer.close(); @@ -423,7 +424,7 @@ .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(6)); ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); - writer.addIndexesNoOptimize(new Directory[] { aux, aux2 }); + writer.addIndexes(new Directory[] { aux, aux2 }); assertEquals(1025, writer.maxDoc()); assertEquals(1000, writer.getDocCount(0)); writer.close(); @@ -543,7 +544,7 @@ writer = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) .setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(lmp)); - writer.addIndexesNoOptimize(new Directory[] {dir}); + writer.addIndexes(new Directory[] {dir}); writer.close(); dir.close(); dir2.close(); @@ -563,7 +564,7 @@ writer = newWriter(other, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(true); - writer.addIndexesNoOptimize(new Directory[] {dir}); + writer.addIndexes(new Directory[] {dir}); assertTrue(writer.newestSegment().getUseCompoundFile()); writer.close(); } Index: lucene/src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java (revision 944220) +++ lucene/src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java (working copy) @@ -1,570 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.store.MockRAMDirectory; - -import org.apache.lucene.search.PhraseQuery; - -public class TestAddIndexesNoOptimize extends LuceneTestCase { - public void testSimpleCase() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // two auxiliary directories - Directory aux = new RAMDirectory(); - Directory aux2 = new RAMDirectory(); - - IndexWriter writer = null; - - writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, - new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) - .setOpenMode(OpenMode.CREATE)); - // add 100 documents - addDocs(writer, 100); - assertEquals(100, writer.maxDoc()); - writer.close(); - - writer = newWriter(aux, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false); // use one without a compound file - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(false); // use one without a compound file - // add 40 documents in separate files - addDocs(writer, 40); - assertEquals(40, writer.maxDoc()); - writer.close(); - - writer = newWriter(aux2, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE)); - // add 40 documents in compound files - addDocs2(writer, 50); - assertEquals(50, writer.maxDoc()); - writer.close(); - - // test doc count before segments are merged - writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); - assertEquals(100, writer.maxDoc()); - writer.addIndexesNoOptimize(new Directory[] { aux, aux2 }); - assertEquals(190, writer.maxDoc()); - writer.close(); - - // make sure the old index is correct - verifyNumDocs(aux, 40); - - // make sure the new index is correct - verifyNumDocs(dir, 190); - - // now add another set in. - Directory aux3 = new RAMDirectory(); - writer = newWriter(aux3, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); - // add 40 documents - addDocs(writer, 40); - assertEquals(40, writer.maxDoc()); - writer.close(); - - // test doc count before segments are merged/index is optimized - writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); - assertEquals(190, writer.maxDoc()); - writer.addIndexesNoOptimize(new Directory[] { aux3 }); - assertEquals(230, writer.maxDoc()); - writer.close(); - - // make sure the new index is correct - verifyNumDocs(dir, 230); - - verifyTermDocs(dir, new Term("content", "aaa"), 180); - - verifyTermDocs(dir, new Term("content", "bbb"), 50); - - // now optimize it. - writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); - writer.optimize(); - writer.close(); - - // make sure the new index is correct - verifyNumDocs(dir, 230); - - verifyTermDocs(dir, new Term("content", "aaa"), 180); - - verifyTermDocs(dir, new Term("content", "bbb"), 50); - - // now add a single document - Directory aux4 = new RAMDirectory(); - writer = newWriter(aux4, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); - addDocs2(writer, 1); - writer.close(); - - writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); - assertEquals(230, writer.maxDoc()); - writer.addIndexesNoOptimize(new Directory[] { aux4 }); - assertEquals(231, writer.maxDoc()); - writer.close(); - - verifyNumDocs(dir, 231); - - verifyTermDocs(dir, new Term("content", "bbb"), 51); - } - - public void testWithPendingDeletes() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // auxiliary directory - Directory aux = new RAMDirectory(); - - setUpDirs(dir, aux); - IndexWriter writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); - writer.addIndexesNoOptimize(new Directory[] {aux}); - - // Adds 10 docs, then replaces them with another 10 - // docs, so 10 pending deletes: - for (int i = 0; i < 20; i++) { - Document doc = new Document(); - doc.add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(new Field("content", "bbb " + i, Field.Store.NO, - Field.Index.ANALYZED)); - writer.updateDocument(new Term("id", "" + (i%10)), doc); - } - // Deletes one of the 10 added docs, leaving 9: - PhraseQuery q = new PhraseQuery(); - q.add(new Term("content", "bbb")); - q.add(new Term("content", "14")); - writer.deleteDocuments(q); - - writer.optimize(); - writer.commit(); - - verifyNumDocs(dir, 1039); - verifyTermDocs(dir, new Term("content", "aaa"), 1030); - verifyTermDocs(dir, new Term("content", "bbb"), 9); - - writer.close(); - dir.close(); - aux.close(); - } - - public void testWithPendingDeletes2() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // auxiliary directory - Directory aux = new RAMDirectory(); - - setUpDirs(dir, aux); - IndexWriter writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); - - // Adds 10 docs, then replaces them with another 10 - // docs, so 10 pending deletes: - for (int i = 0; i < 20; i++) { - Document doc = new Document(); - doc.add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(new Field("content", "bbb " + i, Field.Store.NO, - Field.Index.ANALYZED)); - writer.updateDocument(new Term("id", "" + (i%10)), doc); - } - - writer.addIndexesNoOptimize(new Directory[] {aux}); - - // Deletes one of the 10 added docs, leaving 9: - PhraseQuery q = new PhraseQuery(); - q.add(new Term("content", "bbb")); - q.add(new Term("content", "14")); - writer.deleteDocuments(q); - - writer.optimize(); - writer.commit(); - - verifyNumDocs(dir, 1039); - verifyTermDocs(dir, new Term("content", "aaa"), 1030); - verifyTermDocs(dir, new Term("content", "bbb"), 9); - - writer.close(); - dir.close(); - aux.close(); - } - - public void testWithPendingDeletes3() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // auxiliary directory - Directory aux = new RAMDirectory(); - - setUpDirs(dir, aux); - IndexWriter writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); - - // Adds 10 docs, then replaces them with another 10 - // docs, so 10 pending deletes: - for (int i = 0; i < 20; i++) { - Document doc = new Document(); - doc.add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(new Field("content", "bbb " + i, Field.Store.NO, - Field.Index.ANALYZED)); - writer.updateDocument(new Term("id", "" + (i%10)), doc); - } - - // Deletes one of the 10 added docs, leaving 9: - PhraseQuery q = new PhraseQuery(); - q.add(new Term("content", "bbb")); - q.add(new Term("content", "14")); - writer.deleteDocuments(q); - - writer.addIndexesNoOptimize(new Directory[] {aux}); - - writer.optimize(); - writer.commit(); - - verifyNumDocs(dir, 1039); - verifyTermDocs(dir, new Term("content", "aaa"), 1030); - verifyTermDocs(dir, new Term("content", "bbb"), 9); - - writer.close(); - dir.close(); - aux.close(); - } - - // case 0: add self or exceed maxMergeDocs, expect exception - public void testAddSelf() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // auxiliary directory - Directory aux = new RAMDirectory(); - - IndexWriter writer = null; - - writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); - // add 100 documents - addDocs(writer, 100); - assertEquals(100, writer.maxDoc()); - writer.close(); - - writer = newWriter(aux, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(1000)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false); // use one without a compound file - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(false); // use one without a compound file - // add 140 documents in separate files - addDocs(writer, 40); - writer.close(); - writer = newWriter(aux, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(1000)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false); // use one without a compound file - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(false); // use one without a compound file - addDocs(writer, 100); - writer.close(); - - writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); - try { - // cannot add self - writer.addIndexesNoOptimize(new Directory[] { aux, dir }); - assertTrue(false); - } - catch (IllegalArgumentException e) { - assertEquals(100, writer.maxDoc()); - } - writer.close(); - - // make sure the index is correct - verifyNumDocs(dir, 100); - } - - // in all the remaining tests, make the doc count of the oldest segment - // in dir large so that it is never merged in addIndexesNoOptimize() - // case 1: no tail segments - public void testNoTailSegments() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // auxiliary directory - Directory aux = new RAMDirectory(); - - setUpDirs(dir, aux); - - IndexWriter writer = newWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) - .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(10)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); - addDocs(writer, 10); - - writer.addIndexesNoOptimize(new Directory[] { aux }); - assertEquals(1040, writer.maxDoc()); - assertEquals(2, writer.getSegmentCount()); - assertEquals(1000, writer.getDocCount(0)); - writer.close(); - - // make sure the index is correct - verifyNumDocs(dir, 1040); - } - - // case 2: tail segments, invariants hold, no copy - public void testNoCopySegments() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // auxiliary directory - Directory aux = new RAMDirectory(); - - setUpDirs(dir, aux); - - IndexWriter writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(9)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); - addDocs(writer, 2); - - writer.addIndexesNoOptimize(new Directory[] { aux }); - assertEquals(1032, writer.maxDoc()); - assertEquals(2, writer.getSegmentCount()); - assertEquals(1000, writer.getDocCount(0)); - writer.close(); - - // make sure the index is correct - verifyNumDocs(dir, 1032); - } - - // case 3: tail segments, invariants hold, copy, invariants hold - public void testNoMergeAfterCopy() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // auxiliary directory - Directory aux = new RAMDirectory(); - - setUpDirs(dir, aux); - - IndexWriter writer = newWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) - .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(10)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); - - writer.addIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); - assertEquals(1060, writer.maxDoc()); - assertEquals(1000, writer.getDocCount(0)); - writer.close(); - - // make sure the index is correct - verifyNumDocs(dir, 1060); - } - - // case 4: tail segments, invariants hold, copy, invariants not hold - public void testMergeAfterCopy() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // auxiliary directory - Directory aux = new RAMDirectory(); - - setUpDirs(dir, aux); - - IndexReader reader = IndexReader.open(aux, false); - for (int i = 0; i < 20; i++) { - reader.deleteDocument(i); - } - assertEquals(10, reader.numDocs()); - reader.close(); - - IndexWriter writer = newWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) - .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(4)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); - - writer.addIndexesNoOptimize(new Directory[] { aux, new RAMDirectory(aux) }); - assertEquals(1020, writer.maxDoc()); - assertEquals(1000, writer.getDocCount(0)); - writer.close(); - - // make sure the index is correct - verifyNumDocs(dir, 1020); - } - - // case 5: tail segments, invariants not hold - public void testMoreMerges() throws IOException { - // main directory - Directory dir = new RAMDirectory(); - // auxiliary directory - Directory aux = new RAMDirectory(); - Directory aux2 = new RAMDirectory(); - - setUpDirs(dir, aux); - - IndexWriter writer = newWriter(aux2, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) - .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(100)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(10); - writer.addIndexesNoOptimize(new Directory[] { aux }); - assertEquals(30, writer.maxDoc()); - assertEquals(3, writer.getSegmentCount()); - writer.close(); - - IndexReader reader = IndexReader.open(aux, false); - for (int i = 0; i < 27; i++) { - reader.deleteDocument(i); - } - assertEquals(3, reader.numDocs()); - reader.close(); - - reader = IndexReader.open(aux2, false); - for (int i = 0; i < 8; i++) { - reader.deleteDocument(i); - } - assertEquals(22, reader.numDocs()); - reader.close(); - - writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) - .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(6)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); - - writer.addIndexesNoOptimize(new Directory[] { aux, aux2 }); - assertEquals(1025, writer.maxDoc()); - assertEquals(1000, writer.getDocCount(0)); - writer.close(); - - // make sure the index is correct - verifyNumDocs(dir, 1025); - } - - private IndexWriter newWriter(Directory dir, IndexWriterConfig conf) - throws IOException { - conf.setMergePolicy(new LogDocMergePolicy()); - final IndexWriter writer = new IndexWriter(dir, conf); - return writer; - } - - private void addDocs(IndexWriter writer, int numDocs) throws IOException { - for (int i = 0; i < numDocs; i++) { - Document doc = new Document(); - doc.add(new Field("content", "aaa", Field.Store.NO, - Field.Index.ANALYZED)); - writer.addDocument(doc); - } - } - - private void addDocs2(IndexWriter writer, int numDocs) throws IOException { - for (int i = 0; i < numDocs; i++) { - Document doc = new Document(); - doc.add(new Field("content", "bbb", Field.Store.NO, - Field.Index.ANALYZED)); - writer.addDocument(doc); - } - } - - private void verifyNumDocs(Directory dir, int numDocs) throws IOException { - IndexReader reader = IndexReader.open(dir, true); - assertEquals(numDocs, reader.maxDoc()); - assertEquals(numDocs, reader.numDocs()); - reader.close(); - } - - private void verifyTermDocs(Directory dir, Term term, int numDocs) - throws IOException { - IndexReader reader = IndexReader.open(dir, true); - TermDocs termDocs = reader.termDocs(term); - int count = 0; - while (termDocs.next()) - count++; - assertEquals(numDocs, count); - reader.close(); - } - - private void setUpDirs(Directory dir, Directory aux) throws IOException { - IndexWriter writer = null; - - writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(1000)); - // add 1000 documents in 1 segment - addDocs(writer, 1000); - assertEquals(1000, writer.maxDoc()); - assertEquals(1, writer.getSegmentCount()); - writer.close(); - - writer = newWriter(aux, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(100)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false); // use one without a compound file - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(false); // use one without a compound file - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(10); - // add 30 documents in 3 segments - for (int i = 0; i < 3; i++) { - addDocs(writer, 10); - writer.close(); - writer = newWriter(aux, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(100)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false); // use one without a compound file - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(false); // use one without a compound file - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(10); - } - assertEquals(30, writer.maxDoc()); - assertEquals(3, writer.getSegmentCount()); - writer.close(); - } - - // LUCENE-1270 - public void testHangOnClose() throws IOException { - - Directory dir = new MockRAMDirectory(); - LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); - lmp.setUseCompoundFile(false); - lmp.setUseCompoundDocStore(false); - lmp.setMergeFactor(100); - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) - .setMaxBufferedDocs(5).setMergePolicy(lmp)); - - Document doc = new Document(); - doc.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - for(int i=0;i<60;i++) - writer.addDocument(doc); - - Document doc2 = new Document(); - doc2.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.NO)); - doc2.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.NO)); - doc2.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.NO)); - doc2.add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.NO)); - for(int i=0;i<10;i++) - writer.addDocument(doc2); - writer.close(); - - Directory dir2 = new MockRAMDirectory(); - lmp = new LogByteSizeMergePolicy(); - lmp.setMinMergeMB(0.0001); - lmp.setUseCompoundFile(false); - lmp.setUseCompoundDocStore(false); - lmp.setMergeFactor(4); - writer = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, - new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) - .setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(lmp)); - writer.addIndexesNoOptimize(new Directory[] {dir}); - writer.close(); - dir.close(); - dir2.close(); - } - - // LUCENE-1642: make sure CFS of destination indexwriter - // is respected when copying tail segments - public void testTargetCFS() throws IOException { - Directory dir = new RAMDirectory(); - IndexWriter writer = newWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(false); - addDocs(writer, 1); - writer.close(); - - Directory other = new RAMDirectory(); - writer = newWriter(other, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(true); - writer.addIndexesNoOptimize(new Directory[] {dir}); - assertTrue(writer.newestSegment().getUseCompoundFile()); - writer.close(); - } -} Index: lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java (revision 944220) +++ lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java (working copy) @@ -117,7 +117,7 @@ TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND) .setMaxBufferedDocs(5)); ((LogMergePolicy) iw.getConfig().getMergePolicy()).setMergeFactor(3); - iw.addIndexesNoOptimize(new Directory[] { dir1, dir2 }); + iw.addIndexes(new Directory[] { dir1, dir2 }); iw.optimize(); iw.close(); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 944220) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -159,7 +159,7 @@ /* Test: make sure when we run out of disk space or hit - random IOExceptions in any of the addIndexesNoOptimize(*) calls + random IOExceptions in any of the addIndexes(*) calls that 1) index is not corrupt (searcher can open/search it) and 2) transactional semantics are followed: either all or none of the incoming documents were in @@ -172,7 +172,7 @@ int END_COUNT = START_COUNT + NUM_DIR*25; // Build up a bunch of dirs that have indexes which we - // will then merge together by calling addIndexesNoOptimize(*): + // will then merge together by calling addIndexes(*): Directory[] dirs = new Directory[NUM_DIR]; long inputDiskUsage = 0; for(int i=0;i 0) { openWriter(); - writer.addIndexesNoOptimize(dirs); + writer.addIndexes(dirs); rc = 1; } else { rc = 0;