Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1042278) +++ lucene/CHANGES.txt (working copy) @@ -113,6 +113,10 @@ throw IllegalArgumentException if nDocs is 0. Instead, you should use the newly added TotalHitCountCollector. (Mike McCandless) +* LUCENE-2790: LogMergePolicy.useCompoundFile's logic now factors in noCFSRatio + to determine whether the passed in segment should be compound. + (Shai Erera, Earwin Burrfoot) + API Changes * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory. (George Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1042278) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -2473,8 +2473,10 @@ for(int iter=0;iter<2;iter++) { Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); - ((LogMergePolicy) w.getMergePolicy()).setUseCompoundFile(true); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(true); + ((LogMergePolicy) conf.getMergePolicy()).setNoCFSRatio(1.0); + IndexWriter w = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(newField("field", "go", Field.Store.NO, Field.Index.ANALYZED)); w.addDocument(doc); @@ -2568,6 +2570,7 @@ public FlushCountingIndexWriter(Directory dir, IndexWriterConfig iwc) throws IOException { super(dir, iwc); } + @Override public void doAfterFlush() { flushCount++; } Index: lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java (revision 1042278) +++ lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java (working copy) @@ -40,11 +40,13 @@ public void testDeleteLeftoverFiles() throws IOException { MockDirectoryWrapper dir = newDirectory(); dir.setPreventDoubleWrite(false); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) - .setMaxBufferedDocs(10)); - ((LogMergePolicy) writer.getMergePolicy()).setMergeFactor(10); - ((LogMergePolicy) writer.getMergePolicy()).setUseCompoundFile(true); + .setMaxBufferedDocs(10); + ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(10); + ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(true); + ((LogMergePolicy) conf.getMergePolicy()).setNoCFSRatio(1.0); + IndexWriter writer = new IndexWriter(dir, conf); int i; for(i=0;i<35;i++) { addDoc(writer, i); Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (revision 1042278) +++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (working copy) @@ -27,6 +27,9 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -885,4 +888,30 @@ assertTrue(c.failures.size() == 0); } + + // LUCENE-2790: tests that the non CFS files were deleted by addIndexes + public void testNonCFSLeftovers() throws Exception { + Directory[] dirs = new Directory[2]; + for (int i = 0; i < dirs.length; i++) { + dirs[i] = new RAMDirectory(); + IndexWriter w = new IndexWriter(dirs[i], new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + Document d = new Document(); + d.add(new Field("c", "v", Store.YES, Index.ANALYZED, TermVector.YES)); + w.addDocument(d); + w.close(); + } + + IndexReader[] readers = new IndexReader[] { IndexReader.open(dirs[0]), IndexReader.open(dirs[1]) }; + + Directory dir = new RAMDirectory(); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); + lmp.setNoCFSRatio(1.0); // Force creation of CFS + IndexWriter w3 = new IndexWriter(dir, conf); + w3.addIndexes(readers); + w3.close(); + + assertEquals("Only one compound segment should exist", 3, dir.listAll().length); + } + } Index: lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 1042278) +++ lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -519,6 +519,9 @@ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(10); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundDocStore(doCFS); + if (doCFS) { + ((LogMergePolicy) conf.getMergePolicy()).setNoCFSRatio(1.0); + } IndexWriter writer = new IndexWriter(dir, conf); for(int i=0;i<35;i++) { @@ -556,9 +559,11 @@ try { Directory dir = FSDirectory.open(new File(fullDir(outputDir))); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(16.0)); - ((LogMergePolicy) writer.getMergePolicy()).setUseCompoundFile(true); - ((LogMergePolicy) writer.getMergePolicy()).setMergeFactor(10); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(16.0); + ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(true); + ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(10); + ((LogMergePolicy) conf.getMergePolicy()).setNoCFSRatio(1.0); + IndexWriter writer = new IndexWriter(dir, conf); for(int i=0;i<35;i++) { addDoc(writer, i); } Index: lucene/src/java/org/apache/lucene/index/MergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MergePolicy.java (revision 1042278) +++ lucene/src/java/org/apache/lucene/index/MergePolicy.java (working copy) @@ -76,16 +76,14 @@ SegmentReader[] readers; // used by IndexWriter SegmentReader[] readersClone; // used by IndexWriter public final SegmentInfos segments; - public final boolean useCompoundFile; boolean aborted; Throwable error; boolean paused; - public OneMerge(SegmentInfos segments, boolean useCompoundFile) { + public OneMerge(SegmentInfos segments) { if (0 == segments.size()) throw new RuntimeException("segments must include at least one segment"); this.segments = segments; - this.useCompoundFile = useCompoundFile; } /** Record that an exception occurred while executing @@ -313,11 +311,8 @@ */ public abstract void close(); - /** - * Returns true if a newly flushed (not from merge) - * segment should use the compound file format. - */ - public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment); + /** Returns true if a new segment (regardless of its origin) should use the compound file format. */ + public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException; /** * Returns true if the doc store files should use the Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (revision 1042278) +++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) @@ -127,8 +127,21 @@ // Javadoc inherited @Override - public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) { - return useCompoundFile; + public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException { + final boolean doCFS; + + if (!useCompoundFile) { + doCFS = false; + } else if (noCFSRatio == 1.0) { + doCFS = true; + } else { + long totalSize = 0; + for (SegmentInfo info : infos) + totalSize += size(info); + + doCFS = size(mergedInfo) <= noCFSRatio * totalSize; + } + return doCFS; } /** Sets whether compound file format should be used for @@ -254,12 +267,12 @@ // unless there is only 1 which is optimized. if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) { // there is more than 1 segment to the right of this one, or an unoptimized single segment. - spec.add(makeOneMerge(infos, infos.range(start + 1, last))); + spec.add(new OneMerge(infos.range(start + 1, last))); } last = start; } else if (last - start == mergeFactor) { // mergeFactor eligible segments were found, add them as a merge. - spec.add(makeOneMerge(infos, infos.range(start, last))); + spec.add(new OneMerge(infos.range(start, last))); last = start; } --start; @@ -267,7 +280,7 @@ // Add any left-over segments, unless there is just 1 already optimized. if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) { - spec.add(makeOneMerge(infos, infos.range(start, last))); + spec.add(new OneMerge(infos.range(start, last))); } return spec.merges.size() == 0 ? null : spec; @@ -284,7 +297,7 @@ // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { - spec.add(makeOneMerge(infos, infos.range(last-mergeFactor, last))); + spec.add(new OneMerge(infos.range(last-mergeFactor, last))); last -= mergeFactor; } @@ -296,7 +309,7 @@ // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !isOptimized(infos.info(0))) { - spec.add(makeOneMerge(infos, infos.range(0, last))); + spec.add(new OneMerge(infos.range(0, last))); } } else if (last > maxNumSegments) { @@ -325,7 +338,7 @@ } } - spec.add(makeOneMerge(infos, infos.range(bestStart, bestStart+finalMergeSize))); + spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize))); } } return spec.merges.size() == 0 ? null : spec; @@ -413,7 +426,7 @@ // deletions, so force a merge now: if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { @@ -422,7 +435,7 @@ // mergeFactor segments if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } @@ -430,7 +443,7 @@ if (firstSegmentWithDeletions != -1) { if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, numSegments))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments))); } return spec; @@ -530,7 +543,7 @@ spec = new MergeSpecification(); if (verbose()) message(" " + start + " to " + end + ": add this merge"); - spec.add(makeOneMerge(infos, infos.range(start, end))); + spec.add(new OneMerge(infos.range(start, end))); } else if (verbose()) message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); @@ -544,29 +557,6 @@ return spec; } - protected OneMerge makeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) throws IOException { - final boolean doCFS; - if (!useCompoundFile) { - doCFS = false; - } else if (noCFSRatio == 1.0) { - doCFS = true; - } else { - - long totSize = 0; - for(SegmentInfo info : infos) { - totSize += size(info); - } - long mergeSize = 0; - for(SegmentInfo info : infosToMerge) { - mergeSize += size(info); - } - - doCFS = mergeSize <= noCFSRatio * totSize; - } - - return new OneMerge(infosToMerge, doCFS); - } - /**
Determines the largest segment (measured by
* document count) that may be merged with other segments.
* Small values (e.g., less than 10,000) are best for
Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/IndexWriter.java (revision 1042278)
+++ lucene/src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -2377,7 +2377,7 @@
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
- * @see LogMergePolicy#findMergesForOptimize
+ * @see MergePolicy#findMergesForOptimize
*/
public void optimize() throws CorruptIndexException, IOException {
optimize(true);
@@ -2957,48 +2957,35 @@
int docCount = merger.merge(); // merge 'em
- SegmentInfo info = null;
+ SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
+ false, true, -1, null, false, merger.hasProx());
+ setDiagnostics(info, "addIndexes(IndexReader...)");
+
+ boolean useCompoundFile;
+ synchronized(this) { // Guard segmentInfos
+ useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
+ }
+
+ // Now create the compound file if needed
+ if (useCompoundFile) {
+ merger.createCompoundFile(mergedName + ".cfs");
+ info.setUseCompoundFile(true);
+
+ // delete new non cfs files directly: they were never
+ // registered with IFD
+ deleter.deleteNewFiles(merger.getMergedFiles());
+ }
+
+ // Register the new segment
synchronized(this) {
- info = new SegmentInfo(mergedName, docCount, directory, false, true,
- -1, null, false, merger.hasProx());
- setDiagnostics(info, "addIndexes(IndexReader...)");
segmentInfos.add(info);
- checkpoint();
// Notify DocumentsWriter that the flushed count just increased
docWriter.updateFlushedDocCount(docCount);
- }
-
- // Now create the compound file if needed
- if (mergePolicy instanceof LogMergePolicy && getUseCompoundFile()) {
- List