Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1041020) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -2479,10 +2479,14 @@ public void testDeleteUnusedFiles() throws Exception { for(int iter=0;iter<2;iter++) { Directory dir = newDirectory(); + + LogMergePolicy mergePolicy = newLogMergePolicy(true); + mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS + IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). - setMergePolicy(newLogMergePolicy(true)) + setMergePolicy(mergePolicy) ); Document doc = new Document(); doc.add(newField("field", "go", Field.Store.NO, Field.Index.ANALYZED)); Index: lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java (revision 1041020) +++ lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java (working copy) @@ -40,18 +40,23 @@ public void testDeleteLeftoverFiles() throws IOException { MockDirectoryWrapper dir = newDirectory(); dir.setPreventDoubleWrite(false); + + LogMergePolicy mergePolicy = newLogMergePolicy(true, 10); + mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS + IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). setMaxBufferedDocs(10). - setMergePolicy(newLogMergePolicy(true, 10)) + setMergePolicy(mergePolicy) ); + int i; for(i=0;i<35;i++) { addDoc(writer, i); } - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(false); + mergePolicy.setUseCompoundFile(false); + mergePolicy.setUseCompoundDocStore(false); for(;i<45;i++) { addDoc(writer, i); } Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (revision 1041020) +++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (working copy) @@ -24,6 +24,9 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.mocksep.MockSepCodec; @@ -36,6 +39,7 @@ import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Version; import org.apache.lucene.util._TestUtil; public class TestAddIndexes extends LuceneTestCase { @@ -1014,5 +1018,30 @@ setFieldCodec("content", mockSepCodec.name); } } + + // LUCENE-2790: tests that the non CFS files were deleted by addIndexes + public void testNonCFSLeftovers() throws Exception { + Directory[] dirs = new Directory[2]; + for (int i = 0; i < dirs.length; i++) { + dirs[i] = new RAMDirectory(); + IndexWriter w = new IndexWriter(dirs[i], new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer())); + Document d = new Document(); + d.add(new Field("c", "v", Store.YES, Index.ANALYZED, TermVector.YES)); + w.addDocument(d); + w.close(); + } + + IndexReader[] readers = new IndexReader[] { IndexReader.open(dirs[0]), IndexReader.open(dirs[1]) }; + + Directory dir = new RAMDirectory(); + IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer()); + LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); + lmp.setNoCFSRatio(1.0); // Force creation of CFS + IndexWriter w3 = new IndexWriter(dir, conf); + w3.addIndexes(readers); + w3.close(); + + assertEquals("Only one compound segment should exist", 3, dir.listAll().length); + } } Index: lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 1041020) +++ lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -527,12 +527,15 @@ try { Directory dir = FSDirectory.open(new File(fullDir(outputDir))); + LogMergePolicy mergePolicy = newLogMergePolicy(true, 10); + mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS + IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). setMaxBufferedDocs(-1). setRAMBufferSizeMB(16.0). - setMergePolicy(newLogMergePolicy(true, 10)) + setMergePolicy(mergePolicy) ); for(int i=0;i<35;i++) { addDoc(writer, i); Index: lucene/src/java/org/apache/lucene/index/MergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MergePolicy.java (revision 1041020) +++ lucene/src/java/org/apache/lucene/index/MergePolicy.java (working copy) @@ -76,16 +76,14 @@ SegmentReader[] readers; // used by IndexWriter SegmentReader[] readersClone; // used by IndexWriter public final SegmentInfos segments; - public final boolean useCompoundFile; boolean aborted; Throwable error; boolean paused; - public OneMerge(SegmentInfos segments, boolean useCompoundFile) { + public OneMerge(SegmentInfos segments) { if (0 == segments.size()) throw new RuntimeException("segments must include at least one segment"); this.segments = segments; - this.useCompoundFile = useCompoundFile; } /** Record that an exception occurred while executing @@ -314,10 +312,9 @@ public abstract void close(); /** - * Returns true if a newly flushed (not from merge) - * segment should use the compound file format. + * Returns true if a new segment (regardless of its origin) should use the compound file format. */ - public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment); + public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException; /** * Returns true if the doc store files should use the Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (revision 1041020) +++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) @@ -127,8 +127,21 @@ // Javadoc inherited @Override - public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) { - return useCompoundFile; + public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException { + final boolean doCFS; + + if (!useCompoundFile) { + doCFS = false; + } else if (noCFSRatio == 1.0) { + doCFS = true; + } else { + long totalSize = 0; + for (SegmentInfo info : infos) + totalSize += size(info); + + doCFS = size(mergedInfo) <= noCFSRatio * totalSize; + } + return doCFS; } /** Sets whether compound file format should be used for @@ -254,12 +267,12 @@ // unless there is only 1 which is optimized. if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) { // there is more than 1 segment to the right of this one, or an unoptimized single segment. - spec.add(makeOneMerge(infos, infos.range(start + 1, last))); + spec.add(new OneMerge(infos.range(start + 1, last))); } last = start; } else if (last - start == mergeFactor) { // mergeFactor eligible segments were found, add them as a merge. - spec.add(makeOneMerge(infos, infos.range(start, last))); + spec.add(new OneMerge(infos.range(start, last))); last = start; } --start; @@ -267,7 +280,7 @@ // Add any left-over segments, unless there is just 1 already optimized. if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) { - spec.add(makeOneMerge(infos, infos.range(start, last))); + spec.add(new OneMerge(infos.range(start, last))); } return spec.merges.size() == 0 ? null : spec; @@ -284,7 +297,7 @@ // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { - spec.add(makeOneMerge(infos, infos.range(last-mergeFactor, last))); + spec.add(new OneMerge(infos.range(last - mergeFactor, last))); last -= mergeFactor; } @@ -296,7 +309,7 @@ // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !isOptimized(infos.info(0))) { - spec.add(makeOneMerge(infos, infos.range(0, last))); + spec.add(new OneMerge(infos.range(0, last))); } } else if (last > maxNumSegments) { @@ -325,7 +338,7 @@ } } - spec.add(makeOneMerge(infos, infos.range(bestStart, bestStart+finalMergeSize))); + spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize))); } } return spec.merges.size() == 0 ? null : spec; @@ -413,7 +426,7 @@ // deletions, so force a merge now: if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { @@ -422,7 +435,7 @@ // mergeFactor segments if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } @@ -430,7 +443,7 @@ if (firstSegmentWithDeletions != -1) { if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, numSegments))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments))); } return spec; @@ -530,7 +543,7 @@ spec = new MergeSpecification(); if (verbose()) message(" " + start + " to " + end + ": add this merge"); - spec.add(makeOneMerge(infos, infos.range(start, end))); + spec.add(new OneMerge(infos.range(start, end))); } else if (verbose()) message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); @@ -544,29 +557,6 @@ return spec; } - protected OneMerge makeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) throws IOException { - final boolean doCFS; - if (!useCompoundFile) { - doCFS = false; - } else if (noCFSRatio == 1.0) { - doCFS = true; - } else { - - long totSize = 0; - for(SegmentInfo info : infos) { - totSize += size(info); - } - long mergeSize = 0; - for(SegmentInfo info : infosToMerge) { - mergeSize += size(info); - } - - doCFS = mergeSize <= noCFSRatio * totSize; - } - - return new OneMerge(infosToMerge, doCFS); - } - /**
Determines the largest segment (measured by
* document count) that may be merged with other segments.
* Small values (e.g., less than 10,000) are best for
Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/IndexWriter.java (revision 1041020)
+++ lucene/src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -50,13 +50,13 @@
An IndexWriter creates and maintains an index.
The create argument to the {@link
- #IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines
+ #IndexWriter(Directory, IndexWriterConfig) constructor} determines
whether a new index is created, or whether an existing index is
opened. Note that you can open an index with create=true
even while readers are using the index. The old readers will
continue to search the "point in time" snapshot they had opened,
and won't see the newly created index until they re-open. There are
- also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
+ also {@link #IndexWriter(Directory, IndexWriterConfig) constructors}
with no create argument which will create a new index
if there is not already an index at the provided path and otherwise
open the existing index.
These changes are buffered in memory and periodically flushed to the {@link Directory} (during the above method calls). A flush is triggered when there are enough - buffered deletes (see {@link #setMaxBufferedDeleteTerms}) + buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) or enough added documents since the last flush, whichever is sooner. For the added documents, flushing is triggered either by RAM usage of the documents (see {@link - #setRAMBufferSizeMB}) or the number of added documents. + IndexWriterConfig#setRAMBufferSizeMB}) or the number of added documents. The default is to flush when RAM usage hits 16 MB. For best indexing speed you should flush by RAM usage with a large RAM buffer. Note that flushing just moves the @@ -1248,8 +1248,8 @@ /** * Adds a document to this index. If the document contains more than - * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are - * discarded. + * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, + * the remainder are discarded. * *
Note that if an Exception is hit (for example disk full) * then the index will be consistent, but this document @@ -1297,7 +1297,7 @@ /** * Adds a document to this index, using the provided analyzer instead of the * value of {@link #getAnalyzer()}. If the document contains more than - * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are + * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are * discarded. * *
See {@link #addDocument(Document)} for details on
@@ -1603,7 +1603,7 @@
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
- * @see LogMergePolicy#findMergesForOptimize
+ * @see MergePolicy#findMergesForOptimize
*/
public void optimize() throws CorruptIndexException, IOException {
optimize(true);
@@ -2282,8 +2282,7 @@
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
- public void addIndexes(IndexReader... readers)
- throws CorruptIndexException, IOException {
+ public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
ensureOpen();
try {
@@ -2296,48 +2295,34 @@
int docCount = merger.merge(); // merge 'em
- SegmentInfo info = null;
+ SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
+ false, -1, null, false, merger.hasProx(), merger.getSegmentCodecs());
+ setDiagnostics(info, "addIndexes(IndexReader...)");
+
+ boolean useCompoundFile;
+ synchronized(this) { // Guard segmentInfos
+ useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
+ }
+
+ // Now create the compound file if needed
+ if (useCompoundFile) {
+ merger.createCompoundFile(mergedName + ".cfs", info);
+ info.setUseCompoundFile(true);
+
+ // delete new non cfs files directly: they were never
+ // registered with IFD
+ deleter.deleteNewFiles(merger.getMergedFiles(info));
+ }
+
+ // Register the new segment
synchronized(this) {
- info = new SegmentInfo(mergedName, docCount, directory, false, -1,
- null, false, merger.hasProx(), merger.getSegmentCodecs());
- setDiagnostics(info, "addIndexes(IndexReader...)");
segmentInfos.add(info);
- checkpoint();
// Notify DocumentsWriter that the flushed count just increased
docWriter.updateFlushedDocCount(docCount);
+
+ checkpoint();
}
-
- // Now create the compound file if needed
- if (mergePolicy instanceof LogMergePolicy && ((LogMergePolicy) mergePolicy).getUseCompoundFile()) {
-
- List