Index: lucene/CHANGES.txt --- lucene/CHANGES.txt Thu Feb 10 05:03:34 2011 -0500 +++ lucene/CHANGES.txt Fri Feb 11 10:33:36 2011 -0500 @@ -154,10 +154,10 @@ * LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler) -* LUCENE-1076: The default merge policy is now able to merge - non-contiguous segments, which means docIDs no longer necessarily - say "in order". If this is a problem then you can use either of the - LogMergePolicy impls, and call setRequireContiguousMerge(true). +* LUCENE-1076, LUCENE-XXXX: The default merge policy + (TieredMergePolicy) is now able to merge non-contiguous segments, + which means docIDs no longer necessarily say "in order". If this is + a problem then you can use either of the LogMergePolicy impls. (Mike McCandless) API Changes @@ -317,6 +317,11 @@ * LUCENE-2862: Added TermsEnum.totalTermFreq() and Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir) +* LUCENE-XXXX: Added a new merge policy, TieredMergePolicy, as the + default merge policy. This policy improves on certain limitations + of LogByteSize/DocMergePolicy, but is allowed to merge segments out + of order. (Mike McCandless) + Optimizations * LUCENE-2588: Don't store unnecessary suffixes when writing the terms Index: lucene/MIGRATE.txt --- lucene/MIGRATE.txt Thu Feb 10 05:03:34 2011 -0500 +++ lucene/MIGRATE.txt Fri Feb 11 10:33:36 2011 -0500 @@ -337,3 +337,9 @@ Similarity can now be configured on a per-field basis. Similarity retains only the field-specific relevance methods such as tf() and idf(). Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider. + +* LUCENE-XXXX: TieredMergePolicy is now the default merge policy. + It's able to merge non-contiguous segments; this may cause problems + for applications that rely on Lucene's internal document ID + assigment. If so, you should use LogByteSize/DocMergePolicy during + indexing. Index: lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java --- lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java Fri Feb 11 10:33:36 2011 -0500 @@ -39,7 +39,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.IndexSearcher; @@ -285,9 +285,9 @@ IndexWriterConfig conf = new IndexWriterConfig( Version.LUCENE_CURRENT, analyzer).setOpenMode( create ? OpenMode.CREATE : OpenMode.APPEND); - LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); - lmp.setUseCompoundFile(useCompoundIndex); - lmp.setMergeFactor(mergeFactor); + TieredMergePolicy tmp = (TieredMergePolicy) conf.getMergePolicy(); + tmp.setUseCompoundFile(useCompoundIndex); + tmp.setMaxMergeAtOnce(mergeFactor); IndexWriter writer = new IndexWriter(dir, conf); int totalFiles = 0; int totalIndexed = 0; Index: lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java --- lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Fri Feb 11 10:33:36 2011 -0500 @@ -65,7 +65,7 @@ // create dir data IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < 20; i++) { Document document = new Document(); @@ -91,7 +91,7 @@ // create dir data IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); indexWriter.setInfoStream(VERBOSE ? System.out : null); if (VERBOSE) { System.out.println("TEST: make test index"); Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java --- lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Fri Feb 11 10:33:36 2011 -0500 @@ -54,7 +54,7 @@ super.setUp(); store = newDirectory(); IndexWriter writer = new IndexWriter(store, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java --- lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Fri Feb 11 10:33:36 2011 -0500 @@ -32,7 +32,7 @@ public void setUp() throws Exception { super.setUp(); dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); Document doc; for (int i = 0; i < NUM_DOCS; i++) { doc = new Document(); Index: lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java --- lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java Fri Feb 11 10:33:36 2011 -0500 @@ -30,7 +30,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -137,7 +137,7 @@ IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer()); cfg.setCodecProvider(new AppendingCodecProvider()); - ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false); + ((TieredMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false); IndexWriter writer = new IndexWriter(dir, cfg); Document doc = new Document(); doc.add(newField("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); Index: lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java --- lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java Fri Feb 11 10:33:36 2011 -0500 @@ -59,7 +59,7 @@ super.setUp(); store = newDirectory(); IndexWriter writer = new IndexWriter(store, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); Index: lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java --- lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java Fri Feb 11 10:33:36 2011 -0500 @@ -43,7 +43,7 @@ public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); //Add series of docs with filterable fields : url, text and dates flags addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101"); Index: lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java --- lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java Fri Feb 11 10:33:36 2011 -0500 @@ -40,7 +40,7 @@ public void setUp() throws Exception { super.setUp(); directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); //Add series of docs with misspelt names addDoc(writer, "jonathon smythe","1"); Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java --- lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Fri Feb 11 10:33:36 2011 -0500 @@ -29,7 +29,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Terms; @@ -45,7 +45,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.Version; -import org.apache.lucene.util.VirtualMethod; /** *

@@ -508,7 +507,7 @@ ensureOpen(); final Directory dir = this.spellIndex; final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(mergeFactor); + ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setMaxMergeAtOnce(mergeFactor); IndexSearcher indexSearcher = obtainSearcher(); final List termsEnums = new ArrayList(); Index: lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java --- lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java Fri Feb 11 10:33:36 2011 -0500 @@ -36,7 +36,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; @@ -250,7 +250,7 @@ // override the specific index if it already exists IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( Version.LUCENE_CURRENT, ana).setOpenMode(OpenMode.CREATE)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why? + ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why? Iterator i1 = word2Nums.keySet().iterator(); while (i1.hasNext()) // for each word { Index: lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java --- lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java Fri Feb 11 10:33:36 2011 -0500 @@ -29,6 +29,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestWordnet extends LuceneTestCase { private IndexSearcher searcher; @@ -42,6 +43,7 @@ // create a temporary synonym index File testFile = getDataFile("testSynonyms.txt"); String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName }; + _TestUtil.rmDir(new File(storePathName)); try { Syns2Index.main(commandLineArgs); @@ -71,8 +73,12 @@ @Override public void tearDown() throws Exception { - searcher.close(); - dir.close(); + if (searcher != null) { + searcher.close(); + } + if (dir != null) { + dir.close(); + } rmDir(storePathName); // delete our temporary synonym index super.tearDown(); } Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java --- lucene/src/java/org/apache/lucene/index/IndexWriter.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/IndexWriter.java Fri Feb 11 10:33:36 2011 -0500 @@ -920,7 +920,7 @@ * message when maxFieldLength is reached will be printed * to this. */ - public void setInfoStream(PrintStream infoStream) { + public void setInfoStream(PrintStream infoStream) throws IOException { ensureOpen(); this.infoStream = infoStream; docWriter.setInfoStream(infoStream); @@ -930,7 +930,7 @@ messageState(); } - private void messageState() { + private void messageState() throws IOException { message("\ndir=" + directory + "\n" + "index=" + segString() + "\n" + "version=" + Constants.LUCENE_VERSION + "\n" + @@ -1684,6 +1684,8 @@ throws CorruptIndexException, IOException { ensureOpen(); + flush(true, true); + if (infoStream != null) message("expungeDeletes: index now " + segString()); @@ -1756,6 +1758,10 @@ * documents, so you must do so yourself if necessary. * See also {@link #expungeDeletes(boolean)} * + *

NOTE: this method first flushes a new + * segment (if there are indexed documents), and applies + * all buffered deletes. + * *

NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See above for details.

@@ -2598,7 +2604,7 @@ return docWriter.getNumDocs(); } - private void ensureValidMerge(MergePolicy.OneMerge merge) { + private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException { for(SegmentInfo info : merge.segments) { if (segmentInfos.indexOf(info) == -1) { throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory); @@ -2870,7 +2876,7 @@ * are now participating in a merge, and true is * returned. Else (the merge conflicts) false is * returned. */ - final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException { + final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException { if (merge.registerDone) return true; @@ -2880,10 +2886,8 @@ throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory)); } - final int count = merge.segments.size(); boolean isExternal = false; - for(int i=0;i 0 ? segmentInfos.info(segmentInfos.size()-1) : null; } - public synchronized String segString() { + /** @lucene.internal */ + public synchronized String segString() throws IOException { return segString(segmentInfos); } - private synchronized String segString(SegmentInfos infos) { + /** @lucene.internal */ + public synchronized String segString(SegmentInfos infos) throws IOException { StringBuilder buffer = new StringBuilder(); final int count = infos.size(); for(int i = 0; i < count; i++) { if (i > 0) { buffer.append(' '); } - final SegmentInfo info = infos.info(i); - buffer.append(info.toString(directory, 0)); - if (info.dir != directory) - buffer.append("**"); + buffer.append(segString(infos.info(i))); + } + + return buffer.toString(); + } + + public synchronized String segString(SegmentInfo info) throws IOException { + StringBuilder buffer = new StringBuilder(); + SegmentReader reader = readerPool.getIfExists(info); + try { + if (reader != null) { + buffer.append(reader.toString()); + } else { + buffer.append(info.toString(directory, 0)); + if (info.dir != directory) { + buffer.append("**"); + } + } + } finally { + if (reader != null) { + readerPool.release(reader); + } } return buffer.toString(); } Index: lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java --- lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Fri Feb 11 10:33:36 2011 -0500 @@ -153,7 +153,7 @@ indexingChain = DocumentsWriter.defaultIndexingChain; mergedSegmentWarmer = null; codecProvider = CodecProvider.getDefault(); - mergePolicy = new LogByteSizeMergePolicy(); + mergePolicy = new TieredMergePolicy(); maxThreadStates = DEFAULT_MAX_THREAD_STATES; readerPooling = DEFAULT_READER_POOLING; readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR; Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java --- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Fri Feb 11 10:33:36 2011 -0500 @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Set; @@ -72,7 +71,6 @@ // out there wrote his own LMP ... protected long maxMergeSizeForOptimize = Long.MAX_VALUE; protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; - protected boolean requireContiguousMerge = false; protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; @@ -111,21 +109,6 @@ writer.get().message("LMP: " + message); } - /** If true, merges must be in-order slice of the - * segments. If false, then the merge policy is free to - * pick any segments. The default is false, which is - * in general more efficient than true since it gives the - * merge policy more freedom to pick closely sized - * segments. */ - public void setRequireContiguousMerge(boolean v) { - requireContiguousMerge = v; - } - - /** See {@link #setRequireContiguousMerge}. */ - public boolean getRequireContiguousMerge() { - return requireContiguousMerge; - } - /**

Returns the number of segments that are merged at * once and also controls the total number of segments * allowed to accumulate in the index.

*/ @@ -378,8 +361,6 @@ return null; } - // TODO: handle non-contiguous merge case differently? - // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): @@ -499,14 +480,6 @@ } } - private static class SortByIndex implements Comparator { - public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) { - return o1.index - o2.index; - } - } - - private static final SortByIndex sortByIndex = new SortByIndex(); - /** Checks if any merges are now necessary and returns a * {@link MergePolicy.MergeSpecification} if so. A merge * is necessary when there are more than {@link @@ -532,31 +505,24 @@ final SegmentInfo info = infos.info(i); long size = size(info); - // When we require contiguous merge, we still add the - // segment to levels to avoid merging "across" a set - // of segment being merged: - if (!requireContiguousMerge && mergingSegments.contains(info)) { - if (verbose()) { - message("seg " + info.name + " already being merged; skip"); - } - continue; - } - // Floor tiny segments if (size < 1) { size = 1; } + final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i); levels.add(infoLevel); + if (verbose()) { - message("seg " + info.name + " level=" + infoLevel.level + " size=" + size); + final long segBytes = sizeBytes(info); + String extra = mergingSegments.contains(info) ? " [merging]" : ""; + if (size >= maxMergeSize) { + extra += " [skip: too large]"; + } + message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra); } } - if (!requireContiguousMerge) { - Collections.sort(levels); - } - final float levelFloor; if (minMergeSize <= 0) levelFloor = (float) 0.0; @@ -614,23 +580,29 @@ int end = start + mergeFactor; while(end <= 1+upto) { boolean anyTooLarge = false; + boolean anyMerging = false; for(int i=start;i= maxMergeSize || sizeDocs(info) >= maxMergeDocs); + if (mergingSegments.contains(info)) { + anyMerging = true; + break; + } } - if (!anyTooLarge) { + if (anyMerging) { + // skip + } else if (!anyTooLarge) { if (spec == null) spec = new MergeSpecification(); - if (verbose()) { - message(" " + start + " to " + end + ": add this merge"); - } - Collections.sort(levels.subList(start, end), sortByIndex); final SegmentInfos mergeInfos = new SegmentInfos(); for(int i=start;iFor normal merging, this policy first computes a + * "budget" of how many segments are allowed by be in the + * index. If the index is over-budget, then the policy + * sorts segments by decresing size (pro-rating by percent + * deletes), and then finds the least-cost merge. Merge + * cost is measured by a combination of the "skew" of the + * merge (size of largest seg divided by smallest seg), + * total merge size and pct deletes reclaimed, + * so that merges with lower skew, smaller size + * and those reclaiming more deletes, are + * favored. + * + *

If a merge will produce a segment that's larger than + * {@link #setMaxMergedSegmentMB}, then the policy will + * merge fewer segments (down to 1 at once, if that one has + * deletions) to keep the segment size under budget. + * + * NOTE: this policy freely merges non-adjacent + * segments; if this is a problem, use {@link + * LogMergePolicy}. + * + *

NOTE: This policy always merges by byte size + * of the segments, always pro-rates by percent deletes, + * and does not apply any maximum segment size during + * optimize (unlike {@link LogByteSizeMergePolicy}. + * + * @lucene.experimental + */ + +// TODO +// - we could try to take into account whether a large +// merge is already running (under CMS) and then bias +// ourselves towards picking smaller merges if so (or, +// maybe CMS should do so) + +public class TieredMergePolicy extends MergePolicy { + + private int maxMergeAtOnce = 10; + private long maxMergedSegmentBytes = 5*1024*1024*1024L; + private int maxMergeAtOnceExplicit = 30; + + private long floorSegmentBytes = 2*1024*1024L; + private double segsPerTier = 10.0; + private double expungeDeletesPctAllowed = 10.0; + private boolean useCompoundFile = true; + private double noCFSRatio = 0.1; + + /** Maximum number of segments to be merged at a time + * during "normal" merging. For explicit merging (eg, + * optimize or expungeDeletes was called), see {@link + * #setMaxMergeAtOnceExplicit}. Default is 10. */ + public TieredMergePolicy setMaxMergeAtOnce(int v) { + if (v < 2) { + throw new IllegalArgumentException("maxMergeAtOnce must be > 1 (got " + v + ")"); + } + maxMergeAtOnce = v; + return this; + } + + /** @see #setMaxMergeAtOnce */ + public int getMaxMergeAtOnce() { + return maxMergeAtOnce; + } + + // TODO: should addIndexes do explicit merging, too? And, + // if user calls IW.maybeMerge "explicitly" + + /** Maximum number of segments to be merged at a time, + * during optimize or expungeDeletes. Default is 30. */ + public TieredMergePolicy setMaxMergeAtOnceExplicit(int v) { + if (v < 2) { + throw new IllegalArgumentException("maxMergeAtOnceExplicit must be > 1 (got " + v + ")"); + } + maxMergeAtOnceExplicit = v; + return this; + } + + /** @see #setMaxMergeAtOnceExplicit */ + public int getMaxMergeAtOnceExplicit() { + return maxMergeAtOnceExplicit; + } + + /** Maximum sized segment to produce during + * normal merging. This setting is approximate: the + * estimate of the merged segment size is made by summing + * sizes of to-be-merged segments (compensating for + * percent deleted docs). Default is 5 GB. */ + public TieredMergePolicy setMaxMergedSegmentMB(double v) { + maxMergedSegmentBytes = (long) (v*1024*1024); + return this; + } + + /** @see #getMaxMergedSegmentMB */ + public double getMaxMergedSegmentMB() { + return maxMergedSegmentBytes/1024/1024.; + } + + /** Segments smaller than this are "rounded up" to this + * size, ie treated as equal (floor) size for merge + * selection. This is to prevent frequent flushing of + * tiny segments from allowing a long tail in the index. + * Default is 2 MB. */ + public TieredMergePolicy setFloorSegmentMB(double v) { + if (v <= 0.0) { + throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")"); + } + floorSegmentBytes = (long) (v*1024*1024); + return this; + } + + /** @see #setFloorSegmentMB */ + public double getFloorSegmentMB() { + return floorSegmentBytes/1024*1024.; + } + + /** When expungeDeletes is called, we only merge away a + * segment if its delete percentage is over this + * threshold. Default is 10%. */ + public TieredMergePolicy setExpungeDeletesPctAllowed(double v) { + if (v < 0.0 || v > 100.0) { + throw new IllegalArgumentException("expungeDeletesPctAllowed must be between 0.0 and 100.0 inclusive (got " + v + ")"); + } + expungeDeletesPctAllowed = v; + return this; + } + + /** @see #setExpungeDeletesPctAllowed */ + public double getExpungeDeletesPctAllowed() { + return expungeDeletesPctAllowed; + } + + /** Sets the allowed number of segments per tier. Smaller + * values mean more merging but fewer segments. + * setMaxMergeAtOnce} otherwise you'll hit + * Default is 10.0. */ + public TieredMergePolicy setSegmentsPerTier(double v) { + if (v < 2.0) { + throw new IllegalArgumentException("segmentsPerTier must be >= 2.0 (got " + v + ")"); + } + segsPerTier = v; + return this; + } + + /** @see #setSegmentsPerTier */ + public double getSegmentsPerTier() { + return segsPerTier; + } + + /** Sets whether compound file format should be used for + * newly flushed and newly merged segments. Default + * true. */ + public TieredMergePolicy setUseCompoundFile(boolean useCompoundFile) { + this.useCompoundFile = useCompoundFile; + return this; + } + + /** @see #setUseCompoundFile */ + public boolean getUseCompoundFile() { + return useCompoundFile; + } + + /** If a merged segment will be more than this percentage + * of the total size of the index, leave the segment as + * non-compound file even if compound file is enabled. + * Set to 1.0 to always use CFS regardless of merge + * size. Default is 0.1. */ + public TieredMergePolicy setNoCFSRatio(double noCFSRatio) { + if (noCFSRatio < 0.0 || noCFSRatio > 1.0) { + throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio); + } + this.noCFSRatio = noCFSRatio; + return this; + } + + /** @see #setNoCFSRatio */ + public double getNoCFSRatio() { + return noCFSRatio; + } + + private class SegmentByteSizeDescending implements Comparator { + public int compare(SegmentInfo o1, SegmentInfo o2) { + try { + final long sz1 = size(o1); + final long sz2 = size(o2); + if (sz1 > sz2) { + return -1; + } else if (sz2 > sz1) { + return 1; + } else { + return o1.name.compareTo(o2.name); + } + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + } + + private final Comparator segmentByteSizeDescending = new SegmentByteSizeDescending(); + + protected static abstract class MergeScore { + abstract double getScore(); + abstract String getExplanation(); + } + + @Override + public MergeSpecification findMerges(SegmentInfos infos) throws IOException { + if (verbose()) { + message("findMerges: " + infos.size() + " segments"); + } + if (infos.size() == 0) { + return null; + } + final Collection merging = writer.get().getMergingSegments(); + final Collection toBeMerged = new HashSet(); + + final SegmentInfos infosSorted = new SegmentInfos(); + infosSorted.addAll(infos); + + Collections.sort(infosSorted, segmentByteSizeDescending); + + // Compute total index bytes & print details about the index + long totIndexBytes = 0; + long minSegmentBytes = Long.MAX_VALUE; + for(SegmentInfo info : infosSorted) { + final long segBytes = size(info); + if (verbose()) { + String extra = merging.contains(info) ? " [merging]" : ""; + if (segBytes >= maxMergedSegmentBytes/2.0) { + extra += " [skip: too large]"; + } else if (segBytes < floorSegmentBytes) { + extra += " [floored]"; + } + message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra); + } + + minSegmentBytes = Math.min(segBytes, minSegmentBytes); + // Accum total byte size + totIndexBytes += segBytes; + } + + // If we have too-large segments, grace them out + // of the maxSegmentCount: + int tooBigCount = 0; + while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) { + totIndexBytes -= size(infosSorted.get(tooBigCount)); + tooBigCount++; + } + + minSegmentBytes = floorSize(minSegmentBytes); + + // Compute max allowed segs in the index + long levelSize = minSegmentBytes; + long bytesLeft = totIndexBytes; + double allowedSegCount = 0; + while(true) { + final double segCountLevel = bytesLeft / (double) levelSize; + if (segCountLevel < segsPerTier) { + allowedSegCount += Math.ceil(segCountLevel); + break; + } + allowedSegCount += segsPerTier; + bytesLeft -= segsPerTier * levelSize; + levelSize *= maxMergeAtOnce; + } + int allowedSegCountInt = (int) allowedSegCount; + + MergeSpecification spec = null; + + // Cycle to possibly select more than one merge: + while(true) { + + // Gather eligible segments for merging, ie segments + // not already being merged and not already picked (by + // prior iteration of this loop) for merging: + final SegmentInfos eligible = new SegmentInfos(); + for(int idx = tooBigCount; idx segmentsToOptimize) throws IOException { + if (verbose()) { + message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize); + } + SegmentInfos eligible = new SegmentInfos(); + boolean optimizeMergeRunning = false; + final Collection merging = writer.get().getMergingSegments(); + for(SegmentInfo info : infos) { + if (segmentsToOptimize.contains(info)) { + if (!merging.contains(info)) { + eligible.add(info); + } else { + optimizeMergeRunning = true; + } + } + } + + if (eligible.size() == 0) { + return null; + } + + if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) || + (maxSegmentCount == 1 && eligible.size() == 1 && isOptimized(eligible.get(0)))) { + if (verbose()) { + message("already optimized"); + } + return null; + } + + Collections.sort(eligible, segmentByteSizeDescending); + + if (verbose()) { + message("eligible=" + eligible); + message("optimizeMergeRunning=" + optimizeMergeRunning); + } + + int end = eligible.size(); + + MergeSpecification spec = null; + + // Do full merges, first, backwards: + while(end >= maxMergeAtOnceExplicit + maxSegmentCount - 1) { + if (spec == null) { + spec = new MergeSpecification(); + } + final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end)); + if (verbose()) { + message("add merge=" + writer.get().segString(merge.segments)); + } + spec.add(merge); + end -= maxMergeAtOnceExplicit; + } + + if (spec == null && !optimizeMergeRunning) { + // Do final merge + final int numToMerge = end - maxSegmentCount + 1; + final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end)); + if (verbose()) { + message("add final merge=" + merge.segString(writer.get().getDirectory())); + } + spec = new MergeSpecification(); + spec.add(merge); + } + + return spec; + } + + @Override + public MergeSpecification findMergesToExpungeDeletes(SegmentInfos infos) + throws CorruptIndexException, IOException { + if (verbose()) { + message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed); + } + final SegmentInfos eligible = new SegmentInfos(); + final Collection merging = writer.get().getMergingSegments(); + for(SegmentInfo info : infos) { + double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount; + if (pctDeletes > expungeDeletesPctAllowed && !merging.contains(info)) { + eligible.add(info); + } + } + + if (eligible.size() == 0) { + return null; + } + + Collections.sort(eligible, segmentByteSizeDescending); + + if (verbose()) { + message("eligible=" + eligible); + } + + int start = 0; + MergeSpecification spec = null; + + while(start < eligible.size()) { + long totAfterMergeBytes = 0; + int upto = start; + boolean done = false; + while(upto < start + maxMergeAtOnceExplicit) { + if (upto == eligible.size()) { + done = true; + break; + } + final SegmentInfo info = eligible.get(upto); + final long segBytes = size(info); + if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) { + // TODO: we could be smarter here, eg cherry + // picking smaller merges that'd sum up to just + // around the max size + break; + } + totAfterMergeBytes += segBytes; + upto++; + } + + if (upto == start) { + // Single segment is too big; grace it + start++; + continue; + } + + if (spec == null) { + spec = new MergeSpecification(); + } + + final OneMerge merge = new OneMerge(eligible.range(start, upto)); + if (verbose()) { + message("add merge=" + writer.get().segString(merge.segments)); + } + spec.add(merge); + start = upto; + if (done) { + break; + } + } + + return spec; + } + + @Override + public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException { + final boolean doCFS; + + if (!useCompoundFile) { + doCFS = false; + } else if (noCFSRatio == 1.0) { + doCFS = true; + } else { + long totalSize = 0; + for (SegmentInfo info : infos) + totalSize += size(info); + + doCFS = size(mergedInfo) <= noCFSRatio * totalSize; + } + return doCFS; + } + + @Override + public void close() { + } + + private boolean isOptimized(SegmentInfo info) + throws IOException { + IndexWriter w = writer.get(); + assert w != null; + boolean hasDeletions = w.numDeletedDocs(info) > 0; + return !hasDeletions && + !info.hasSeparateNorms() && + info.dir == w.getDirectory() && + (info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0); + } + + // Segment size in bytes, pro-rated by % deleted + private long size(SegmentInfo info) throws IOException { + final long byteSize = info.sizeInBytes(true); + final int delCount = writer.get().numDeletedDocs(info); + final double delRatio = (info.docCount <= 0 ? 0.0f : ((double)delCount / (double)info.docCount)); + assert delRatio <= 1.0; + return (long) (byteSize * (1.0-delRatio)); + } + + private long floorSize(long bytes) { + return Math.max(floorSegmentBytes, bytes); + } + + private boolean verbose() { + IndexWriter w = writer.get(); + return w != null && w.verbose(); + } + + private void message(String message) { + if (verbose()) { + writer.get().message("TMP: " + message); + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": "); + sb.append("maxMergeAtOnce=").append(maxMergeAtOnce).append(", "); + sb.append("maxMergeAtOnceExplicit=").append(maxMergeAtOnceExplicit).append(", "); + sb.append("maxMergedSegmentMB=").append(maxMergedSegmentBytes/1024/1024.).append(", "); + sb.append("floorSegmentMB=").append(floorSegmentBytes/1024/1024.).append(", "); + sb.append("expungeDeletesPctAllowed=").append(expungeDeletesPctAllowed).append(", "); + sb.append("segmentsPerTier=").append(segsPerTier).append(", "); + sb.append("useCompoundFile=").append(useCompoundFile).append(", "); + sb.append("noCFSRatio=").append(noCFSRatio); + return sb.toString(); + } +} \ No newline at end of file Index: lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java --- lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java Fri Feb 11 10:33:36 2011 -0500 @@ -128,7 +128,7 @@ body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(body); - id = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(id); date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Fri Feb 11 10:33:36 2011 -0500 @@ -763,9 +763,11 @@ } if (r.nextBoolean()) { + c.setMergePolicy(newTieredMergePolicy()); + } else if (r.nextBoolean()) { + c.setMergePolicy(newLogMergePolicy()); + } else { c.setMergePolicy(new MockRandomMergePolicy(r)); - } else { - c.setMergePolicy(newLogMergePolicy()); } c.setReaderPooling(r.nextBoolean()); @@ -777,6 +779,10 @@ return newLogMergePolicy(random); } + public static TieredMergePolicy newTieredMergePolicy() { + return newTieredMergePolicy(random); + } + public static LogMergePolicy newLogMergePolicy(Random r) { LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy(); logmp.setUseCompoundFile(r.nextBoolean()); @@ -789,17 +795,22 @@ return logmp; } - public static LogMergePolicy newInOrderLogMergePolicy() { - LogMergePolicy logmp = newLogMergePolicy(); - logmp.setRequireContiguousMerge(true); - return logmp; - } - - public static LogMergePolicy newInOrderLogMergePolicy(int mergeFactor) { - LogMergePolicy logmp = newLogMergePolicy(); - logmp.setMergeFactor(mergeFactor); - logmp.setRequireContiguousMerge(true); - return logmp; + public static TieredMergePolicy newTieredMergePolicy(Random r) { + TieredMergePolicy tmp = new TieredMergePolicy(); + if (r.nextInt(3) == 2) { + tmp.setMaxMergeAtOnce(2); + tmp.setMaxMergeAtOnceExplicit(2); + } else { + tmp.setMaxMergeAtOnce(_TestUtil.nextInt(r, 2, 20)); + tmp.setMaxMergeAtOnceExplicit(_TestUtil.nextInt(r, 2, 30)); + } + tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0); + tmp.setFloorSegmentMB(0.2 + r.nextDouble() * 2.0); + tmp.setExpungeDeletesPctAllowed(0.0 + r.nextDouble() * 30.0); + tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 2, 20)); + tmp.setUseCompoundFile(r.nextBoolean()); + tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8); + return tmp; } public static LogMergePolicy newLogMergePolicy(boolean useCFS) { Index: lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java --- lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Fri Feb 11 10:33:36 2011 -0500 @@ -38,10 +38,13 @@ import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.Directory; +import org.junit.Assert; public class _TestUtil { @@ -283,9 +286,14 @@ // count lowish public static void reduceOpenFiles(IndexWriter w) { // keep number of open files lowish - LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); - lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); - + MergePolicy mp = w.getConfig().getMergePolicy(); + if (mp instanceof LogMergePolicy) { + LogMergePolicy lmp = (LogMergePolicy) mp; + lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); + } else if (mp instanceof TieredMergePolicy) { + TieredMergePolicy tmp = (TieredMergePolicy) mp; + tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce())); + } MergeScheduler ms = w.getConfig().getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) { ((ConcurrentMergeScheduler) ms).setMaxThreadCount(2); Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java --- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Fri Feb 11 10:33:36 2011 -0500 @@ -1039,8 +1039,9 @@ IndexReader[] readers = new IndexReader[] { IndexReader.open(dirs[0]), IndexReader.open(dirs[1]) }; Directory dir = new RAMDirectory(); - IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()); LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); + lmp.setUseCompoundFile(true); lmp.setNoCFSRatio(1.0); // Force creation of CFS IndexWriter w3 = new IndexWriter(dir, conf); w3.addIndexes(readers); Index: lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java --- lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java Fri Feb 11 10:33:36 2011 -0500 @@ -129,7 +129,7 @@ IndexWriterConfig conf = new IndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) .setMaxBufferedDocs(7); - ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(3); + ((TieredMergePolicy) conf.getMergePolicy()).setMaxMergeAtOnce(3); IndexWriter writer = new MockIndexWriter(directory, conf); writer.setInfoStream(VERBOSE ? System.out : null); Index: lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java --- lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java Fri Feb 11 10:33:36 2011 -0500 @@ -619,7 +619,7 @@ Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) - .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy).setMergePolicy(newInOrderLogMergePolicy()); + .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy).setMergePolicy(newLogMergePolicy()); MergePolicy mp = conf.getMergePolicy(); if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile); Index: lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java --- lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java Fri Feb 11 10:33:36 2011 -0500 @@ -116,7 +116,7 @@ Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer( - MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newInOrderLogMergePolicy())); + MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newLogMergePolicy())); int numDocs = 131; int max = 1051; int term = random.nextInt(max); @@ -197,7 +197,7 @@ Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer( - MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newInOrderLogMergePolicy())); + MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newLogMergePolicy())); int numDocs = 499; int max = 15678; int term = random.nextInt(max); Index: lucene/src/test/org/apache/lucene/index/TestIndexReader.java --- lucene/src/test/org/apache/lucene/index/TestIndexReader.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestIndexReader.java Fri Feb 11 10:33:36 2011 -0500 @@ -371,7 +371,7 @@ Directory dir = newDirectory(); byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < 10; i++) { addDoc(writer, "document number " + (i + 1)); @@ -380,7 +380,7 @@ addDocumentWithTermVectorFields(writer); } writer.close(); - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy())); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(new Field("bin1", bin)); doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED)); @@ -417,7 +417,7 @@ // force optimize - writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy())); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); writer.optimize(); writer.close(); reader = IndexReader.open(dir, false); @@ -1163,7 +1163,7 @@ public void testMultiReaderDeletes() throws Exception { Directory dir = newDirectory(); - RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(newField("f", "doctor", Field.Store.NO, Field.Index.NOT_ANALYZED)); w.addDocument(doc); Index: lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java --- lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java Fri Feb 11 10:33:36 2011 -0500 @@ -174,7 +174,7 @@ private void doTestReopenWithCommit (Random random, Directory dir, boolean withReopen) throws IOException { IndexWriter iwriter = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode( - OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newInOrderLogMergePolicy())); + OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newLogMergePolicy())); iwriter.commit(); IndexReader reader = IndexReader.open(dir, false); try { Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java --- lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java Fri Feb 11 10:33:36 2011 -0500 @@ -68,7 +68,7 @@ assertNull(conf.getMergedSegmentWarmer()); assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); assertEquals(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.getReaderTermsIndexDivisor()); - assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass()); + assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); // Sanity check - validate that all getters are covered. Set getters = new HashSet(); @@ -246,7 +246,7 @@ assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); // Test MergePolicy - assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass()); + assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); conf.setMergePolicy(new LogDocMergePolicy()); assertEquals(LogDocMergePolicy.class, conf.getMergePolicy().getClass()); conf.setMergePolicy(null); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java --- lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Fri Feb 11 10:33:36 2011 -0500 @@ -104,7 +104,7 @@ dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). setMaxBufferedDocs(10). - setMergePolicy(newInOrderLogMergePolicy()) + setMergePolicy(newLogMergePolicy()) ); for (int i = 0; i < 250; i++) { Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java --- lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java Fri Feb 11 10:33:36 2011 -0500 @@ -58,7 +58,7 @@ IndexWriter writer = new IndexWriter( merged, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). - setMergePolicy(newInOrderLogMergePolicy(2)) + setMergePolicy(newLogMergePolicy(2)) ); writer.setInfoStream(VERBOSE ? System.out : null); writer.addIndexes(indexA, indexB); @@ -101,7 +101,7 @@ newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(2). - setMergePolicy(newInOrderLogMergePolicy(2)) + setMergePolicy(newLogMergePolicy(2)) ); for (int i = start; i < (start + numDocs); i++) Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java --- lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java Fri Feb 11 10:33:36 2011 -0500 @@ -46,7 +46,7 @@ import java.util.concurrent.atomic.AtomicInteger; public class TestIndexWriterReader extends LuceneTestCase { - static PrintStream infoStream; + static PrintStream infoStream = VERBOSE ? System.out : null; public static int count(Term t, IndexReader r) throws IOException { int count = 0; Index: lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java --- lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java Fri Feb 11 10:33:36 2011 -0500 @@ -45,7 +45,7 @@ super.setUp(); dir = newDirectory(); IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(MockTokenizer.SIMPLE, true)).setMergePolicy(newInOrderLogMergePolicy()); + new MockAnalyzer(MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy()); config.setSimilarityProvider(new TestSimilarity()); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); Document doc = new Document(); Index: lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java --- lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java Fri Feb 11 10:33:36 2011 -0500 @@ -71,7 +71,7 @@ public void testSimpleSkip() throws IOException { Directory dir = new CountingRAMDirectory(new RAMDirectory()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")).setMergePolicy(newInOrderLogMergePolicy())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")).setMergePolicy(newLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); Index: lucene/src/test/org/apache/lucene/index/TestNRTThreads.java --- lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Fri Feb 11 10:33:36 2011 -0500 @@ -102,18 +102,7 @@ if (VERBOSE) { writer.setInfoStream(System.out); } - MergeScheduler ms = writer.getConfig().getMergeScheduler(); - if (ms instanceof ConcurrentMergeScheduler) { - // try to keep max file open count down - ((ConcurrentMergeScheduler) ms).setMaxThreadCount(1); - ((ConcurrentMergeScheduler) ms).setMaxMergeCount(1); - } - /* - LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); - if (lmp.getMergeFactor() > 5) { - lmp.setMergeFactor(5); - } - */ + _TestUtil.reduceOpenFiles(writer); final int NUM_INDEX_THREADS = 2; final int NUM_SEARCH_THREADS = 3; @@ -147,36 +136,36 @@ } if (random.nextBoolean()) { if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("id")); + //System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("docid")); } writer.addDocument(doc); } else { // we use update but it never replaces a // prior doc if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("id")); + //System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("docid")); } - writer.updateDocument(new Term("id", doc.get("id")), doc); + writer.updateDocument(new Term("docid", doc.get("docid")), doc); } if (random.nextInt(5) == 3) { if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("id")); + //System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("docid")); } - toDeleteIDs.add(doc.get("id")); + toDeleteIDs.add(doc.get("docid")); } if (random.nextInt(50) == 17) { if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes"); + //System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes"); } for(String id : toDeleteIDs) { if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": del term=id:" + id); + //System.out.println(Thread.currentThread().getName() + ": del term=id:" + id); } - writer.deleteDocuments(new Term("id", id)); + writer.deleteDocuments(new Term("docid", id)); } final int count = delCount.addAndGet(toDeleteIDs.size()); if (VERBOSE) { - System.out.println(Thread.currentThread().getName() + ": tot " + count + " deletes"); + //System.out.println(Thread.currentThread().getName() + ": tot " + count + " deletes"); } delIDs.addAll(toDeleteIDs); toDeleteIDs.clear(); @@ -357,18 +346,18 @@ final IndexSearcher s = newSearcher(r2); boolean doFail = false; for(String id : delIDs) { - final TopDocs hits = s.search(new TermQuery(new Term("id", id)), 1); + final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1); if (hits.totalHits != 0) { System.out.println("doc id=" + id + " is supposed to be deleted, but got docID=" + hits.scoreDocs[0].doc); doFail = true; } } - final int endID = Integer.parseInt(docs.nextDoc().get("id")); + final int endID = Integer.parseInt(docs.nextDoc().get("docid")); for(int id=0;id> docs = new ArrayList>(); Document d = new Document(); Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED); Index: lucene/src/test/org/apache/lucene/search/TestSort.java --- lucene/src/test/org/apache/lucene/search/TestSort.java Thu Feb 10 05:03:34 2011 -0500 +++ lucene/src/test/org/apache/lucene/search/TestSort.java Fri Feb 11 10:33:36 2011 -0500 @@ -120,7 +120,7 @@ throws IOException { Directory indexStore = newDirectory(); dirs.add(indexStore); - RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy())); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy())); for (int i=0; i