Index: lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java --- lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Sat Nov 20 09:40:54 2010 -0500 +++ lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Sat Nov 20 10:59:12 2010 -0500 @@ -17,42 +17,43 @@ package org.apache.lucene.benchmark.byTask; -import java.io.StringReader; +import java.io.BufferedReader; import java.io.File; import java.io.FileReader; -import java.io.BufferedReader; +import java.io.StringReader; import java.text.Collator; import java.util.List; import java.util.Locale; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker; +import org.apache.lucene.benchmark.byTask.stats.TaskStats; +import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask; import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask; -import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask; -import org.apache.lucene.benchmark.byTask.stats.TaskStats; import org.apache.lucene.collation.CollationKeyAnalyzer; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.SerialMergeScheduler; +import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.MultiFields; -import org.apache.lucene.index.FieldsEnum; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; -import org.apache.lucene.index.SerialMergeScheduler; -import org.apache.lucene.index.LogDocMergePolicy; -import org.apache.lucene.index.TermFreqVector; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.store.Directory; import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; /** * Test very simply that perf tasks - simple algorithms - are doing what they should. @@ -809,12 +810,9 @@ ir.close(); // Make sure we have 3 segments: - final String[] files = benchmark.getRunData().getDirectory().listAll(); - int cfsCount = 0; - for(int i=0;i= 10% of + * the index, then we disable compound file for it. + * @see setNoCFSRatio */ + public static final double DEFAULT_NO_CFS_RATIO = 0.1; + protected int mergeFactor = DEFAULT_MERGE_FACTOR; protected long minMergeSize; protected long maxMergeSize; protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; + protected boolean calibrateSizeByDeletes = true; protected boolean useCompoundFile = true; @@ -73,6 +80,23 @@ IndexWriter w = writer.get(); return w != null && w.verbose(); } + + /** @see setNoCFSRatio */ + public double getNoCFSRatio() { + return noCFSRatio; + } + + /** If a merged segment will be more than this percentage + * of the total size of the index, leave the segment as + * non-compound file even if compound file is enabled. + * Set to 1.0 to always use CFS regardless of merge + * size. */ + public void setNoCFSRatio(double noCFSRatio) { + if (noCFSRatio < 0.0 || noCFSRatio > 1.0) { + throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio); + } + this.noCFSRatio = noCFSRatio; + } protected void message(String message) { if (verbose()) @@ -207,7 +231,8 @@ return !hasDeletions && !info.hasSeparateNorms() && info.dir == w.getDirectory() && - info.getUseCompoundFile() == useCompoundFile; + (info.getUseCompoundFile() == useCompoundFile || + (noCFSRatio != 1.0 && !info.getUseCompoundFile())); } /** @@ -230,12 +255,12 @@ // unless there is only 1 which is optimized. if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) { // there is more than 1 segment to the right of this one, or an unoptimized single segment. - spec.add(new OneMerge(infos.range(start + 1, last), useCompoundFile)); + spec.add(makeOneMerge(infos, infos.range(start + 1, last))); } last = start; } else if (last - start == mergeFactor) { // mergeFactor eligible segments were found, add them as a merge. - spec.add(new OneMerge(infos.range(start, last), useCompoundFile)); + spec.add(makeOneMerge(infos, infos.range(start, last))); last = start; } --start; @@ -243,7 +268,7 @@ // Add any left-over segments, unless there is just 1 already optimized. if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) { - spec.add(new OneMerge(infos.range(start, last), useCompoundFile)); + spec.add(makeOneMerge(infos, infos.range(start, last))); } return spec.merges.size() == 0 ? null : spec; @@ -260,7 +285,7 @@ // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { - spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile)); + spec.add(makeOneMerge(infos, infos.range(last-mergeFactor, last))); last -= mergeFactor; } @@ -272,7 +297,7 @@ // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !isOptimized(infos.info(0))) { - spec.add(new OneMerge(infos.range(0, last), useCompoundFile)); + spec.add(makeOneMerge(infos, infos.range(0, last))); } } else if (last > maxNumSegments) { @@ -301,7 +326,7 @@ } } - spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile)); + spec.add(makeOneMerge(infos, infos.range(bestStart, bestStart+finalMergeSize))); } } return spec.merges.size() == 0 ? null : spec; @@ -389,7 +414,7 @@ // deletions, so force a merge now: if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile)); + spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { @@ -398,7 +423,7 @@ // mergeFactor segments if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile)); + spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } @@ -406,7 +431,7 @@ if (firstSegmentWithDeletions != -1) { if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive"); - spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments), useCompoundFile)); + spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, numSegments))); } return spec; @@ -506,7 +531,7 @@ spec = new MergeSpecification(); if (verbose()) message(" " + start + " to " + end + ": add this merge"); - spec.add(new OneMerge(infos.range(start, end), useCompoundFile)); + spec.add(makeOneMerge(infos, infos.range(start, end))); } else if (verbose()) message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); @@ -520,6 +545,29 @@ return spec; } + protected OneMerge makeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) throws IOException { + final boolean doCFS; + if (!useCompoundFile) { + doCFS = false; + } else if (noCFSRatio == 1.0) { + doCFS = true; + } else { + + long totSize = 0; + for(SegmentInfo info : infos) { + totSize += size(info); + } + long mergeSize = 0; + for(SegmentInfo info : infosToMerge) { + mergeSize += size(info); + } + + doCFS = mergeSize <= noCFSRatio * totSize; + } + + return new OneMerge(infosToMerge, doCFS); + } + /**

Determines the largest segment (measured by * document count) that may be merged with other segments. * Small values (e.g., less than 10,000) are best for Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Sat Nov 20 09:40:54 2010 -0500 +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Sat Nov 20 10:59:12 2010 -0500 @@ -2595,13 +2595,13 @@ files = Arrays.asList(dir.listAll()); assertTrue(files.contains("_0.cfs")); // optimize created this - assertTrue(files.contains("_2.cfs")); + //assertTrue(files.contains("_2.cfs")); w.deleteUnusedFiles(); files = Arrays.asList(dir.listAll()); // r still holds this file open assertTrue(files.contains("_0.cfs")); - assertTrue(files.contains("_2.cfs")); + //assertTrue(files.contains("_2.cfs")); r.close(); if (iter == 0) { @@ -2614,7 +2614,7 @@ files = Arrays.asList(dir.listAll()); assertFalse(files.contains("_0.cfs")); } - assertTrue(files.contains("_2.cfs")); + //assertTrue(files.contains("_2.cfs")); w.close(); r2.close(); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java --- lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Sat Nov 20 09:40:54 2010 -0500 +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Sat Nov 20 10:59:12 2010 -0500 @@ -989,6 +989,7 @@ writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); ((LogMergePolicy) writer.getMergePolicy()).setUseCompoundFile(true); + ((LogMergePolicy) writer.getMergePolicy()).setNoCFSRatio(1.0); // add 100 documents for (int i = 0; i < 100; i++) { Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java --- lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Sat Nov 20 09:40:54 2010 -0500 +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Sat Nov 20 10:59:12 2010 -0500 @@ -252,25 +252,5 @@ if (upperBound * mergeFactor <= maxMergeDocs) { assertTrue(numSegments < mergeFactor); } - - String[] files = writer.getDirectory().listAll(); - int segmentCfsCount = 0; - for (int i = 0; i < files.length; i++) { - if (files[i].endsWith(".cfs")) { - segmentCfsCount++; - } - } - assertEquals("index=" + writer.segString(), segmentCount, segmentCfsCount); } - - /* - private void printSegmentDocCounts(IndexWriter writer) { - int segmentCount = writer.getSegmentCount(); - System.out.println("" + segmentCount + " segments total"); - for (int i = 0; i < segmentCount; i++) { - System.out.println(" segment " + i + " has " + writer.getDocCount(i) - + " docs"); - } - } - */ }