Index: src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== 74a75,264 > > private void addDoc2(IndexWriter writer) throws IOException > { > Document doc = new Document(); > doc.add(new Field("content", "bbb", Field.Store.NO, Field.Index.TOKENIZED)); > writer.addDocument(doc); > } > > public void testAddIndexes() throws IOException > { > Directory dir = new RAMDirectory(); > > Directory aux = new RAMDirectory(); > > IndexWriter writer = null; > IndexReader reader = null; > int i; > > writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); > // add 100 documents > for (i = 0; i < 100; i++) { > addDoc(writer); > } > assertEquals(100, writer.docCount()); > writer.close(); > > writer = new IndexWriter(aux, new WhitespaceAnalyzer(), true); > // add 40 documents > for (i = 0; i < 40; i++) { > addDoc(writer); > } > assertEquals(40, writer.docCount()); > writer.close(); > > // delete 40 documents > // test doc count before segments are merged/index is optimized > writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); > assertEquals(100, writer.docCount()); > > writer.addIndexes(new Directory[] {aux}); > assertEquals(140, writer.docCount()); > writer.close(); > > // Make sure the old index is correct > reader = IndexReader.open(aux); > assertEquals(40, reader.maxDoc()); > assertEquals(40, reader.numDocs()); > reader.close(); > > // Make sure the new index is correct > reader = IndexReader.open(dir); > assertEquals(140, reader.maxDoc()); > assertEquals(140, reader.numDocs()); > reader.close(); > } > > public void testAddIndexesNoOpt() throws IOException > { > // Main directory > Directory dir = new RAMDirectory(); > // Two auxiliary directories > Directory aux = new RAMDirectory(); > Directory aux2 = new RAMDirectory(); > > IndexWriter writer = null; > IndexReader reader = null; > int i; > > writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); > // add 100 documents > for (i = 0; i < 100; i++) { > addDoc(writer); > } > assertEquals(100, writer.docCount()); > writer.close(); > > writer = new IndexWriter(aux, new WhitespaceAnalyzer(), true); > writer.setUseCompoundFile(false); // Use one without a compound file > // add 40 documents in separate files > for (i = 0; i < 40; i++) { > addDoc(writer); > } > assertEquals(40, writer.docCount()); > writer.close(); > > writer = new IndexWriter(aux2, new WhitespaceAnalyzer(), true); > // add 40 documents in compound files > for (i = 0; i < 50; i++) { > addDoc2(writer); > } > assertEquals(50, writer.docCount()); > writer.close(); > > // test doc count before segments are merged > writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); > assertEquals(100, writer.docCount()); > > writer.addIndexesNoOpt(new Directory[] {aux, aux2}); > assertEquals(190, writer.docCount()); > writer.close(); > > // Make sure the old index is correct > reader = IndexReader.open(aux); > assertEquals(40, reader.maxDoc()); > assertEquals(40, reader.numDocs()); > reader.close(); > > // Make sure the new index is correct > reader = IndexReader.open(dir); > assertEquals(190, reader.maxDoc()); > assertEquals(190, reader.numDocs()); > reader.close(); > > // OK, now add another set in. > Directory aux3 = new RAMDirectory(); > > writer = new IndexWriter(aux3, new WhitespaceAnalyzer(), true); > // add 40 documents > for (i = 0; i < 40; i++) { > addDoc(writer); > } > assertEquals(40, writer.docCount()); > writer.close(); > > // test doc count before segments are merged/index is optimized > writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); > assertEquals(190, writer.docCount()); > writer.addIndexesNoOpt(new Directory[] {aux3}); > assertEquals(230, writer.docCount()); > writer.close(); > > // Make sure the new index is correct > reader = IndexReader.open(dir); > assertEquals(230, reader.maxDoc()); > assertEquals(230, reader.numDocs()); > reader.close(); > > TermDocs tDocs = reader.termDocs(new Term("content", "aaa")); > int count = 0; > while (tDocs.next()) count++; > assertEquals(180, count); > > tDocs = reader.termDocs(new Term("content", "bbb")); > count = 0; > while (tDocs.next()) count++; > assertEquals(50, count); > > // Now optimize it. > writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); > writer.optimize(); > writer.close(); > > // Make sure the new index is correct > reader = IndexReader.open(dir); > assertEquals(230, reader.maxDoc()); > assertEquals(230, reader.numDocs()); > reader.close(); > > tDocs = reader.termDocs(new Term("content", "aaa")); > count = 0; > while (tDocs.next()) count++; > assertEquals(180, count); > > tDocs = reader.termDocs(new Term("content", "bbb")); > count = 0; > while (tDocs.next()) count++; > assertEquals(50, count); > > // Now add a single document > Directory aux4 = new RAMDirectory(); > writer = new IndexWriter(aux4, new WhitespaceAnalyzer(), true); > addDoc2(writer); > writer.close(); > > writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); > assertEquals(230, writer.docCount()); > writer.addIndexesNoOpt(new Directory[] {aux4}); > assertEquals(231, writer.docCount()); > writer.close(); > > reader = IndexReader.open(dir); > assertEquals(231, reader.maxDoc()); > assertEquals(231, reader.numDocs()); > reader.close(); > > tDocs = reader.termDocs(new Term("content", "bbb")); > count = 0; > while (tDocs.next()) count++; > assertEquals(51, count); > } Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== 556a556,604 > /** > * Merges all segments from an array of indexes into this index, without > * optimizing the index. It does this by renaming the files in the target > * directories so have the appropriate segment names/numbers and then > * merging them into the current index. > * > *

This may be used to parallelize batch indexing. A large document > * collection can be broken into sub-collections. Each sub-collection can be > * indexed in parallel, on a different thread, process or machine. The > * complete index can then be created by merging sub-collection indexes > * with this method. > * > *

After this completes, the index is unoptimized, but the indexes > * from each directory passed in will be merged into one segment before > * adding to the main index. > * > */ > public synchronized void addIndexesNoOpt(Directory[] dirs) > throws IOException { > > int curDocCount = docCount(); // Documents currently in the index > int addedDocs = 0; // Documents added so far, not in the index > int start = segmentInfos.size(); // The position where segments from other directories are added > > for (int i = 0; i < dirs.length; i++) { > SegmentInfos sis = new SegmentInfos(); // read infos from dir > sis.read(dirs[i]); > > for (int j = 0; j < sis.size(); j++) { > SegmentInfo info = sis.info(j); > segmentInfos.addElement(info); // add each info > addedDocs += info.docCount; // Keep track of the size. > } > // If we've increased the index size by 1/2, we should merge segments now > if (addedDocs * 2 > curDocCount && (curDocCount > 0)) { > mergeSegments(start); > curDocCount = docCount(); > addedDocs = 0; > start = segmentInfos.size(); > } > } > > // Merge in all segments not yet in the index. > mergeSegments(start); > > // Make sure we're under the doc factor > maybeMergeSegments(); > } >