Index: lucene/CHANGES.txt
--- lucene/CHANGES.txt Thu Feb 10 05:03:34 2011 -0500
+++ lucene/CHANGES.txt Fri Feb 11 10:33:36 2011 -0500
@@ -154,10 +154,10 @@
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
-* LUCENE-1076: The default merge policy is now able to merge
- non-contiguous segments, which means docIDs no longer necessarily
- say "in order". If this is a problem then you can use either of the
- LogMergePolicy impls, and call setRequireContiguousMerge(true).
+* LUCENE-1076, LUCENE-XXXX: The default merge policy
+ (TieredMergePolicy) is now able to merge non-contiguous segments,
+ which means docIDs no longer necessarily say "in order". If this is
+ a problem then you can use either of the LogMergePolicy impls.
(Mike McCandless)
API Changes
@@ -317,6 +317,11 @@
* LUCENE-2862: Added TermsEnum.totalTermFreq() and
Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir)
+* LUCENE-XXXX: Added a new merge policy, TieredMergePolicy, as the
+ default merge policy. This policy improves on certain limitations
+ of LogByteSize/DocMergePolicy, but is allowed to merge segments out
+ of order. (Mike McCandless)
+
Optimizations
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
Index: lucene/MIGRATE.txt
--- lucene/MIGRATE.txt Thu Feb 10 05:03:34 2011 -0500
+++ lucene/MIGRATE.txt Fri Feb 11 10:33:36 2011 -0500
@@ -337,3 +337,9 @@
Similarity can now be configured on a per-field basis.
Similarity retains only the field-specific relevance methods such as tf() and idf().
Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider.
+
+* LUCENE-XXXX: TieredMergePolicy is now the default merge policy.
+ It's able to merge non-contiguous segments; this may cause problems
+ for applications that rely on Lucene's internal document ID
+ assigment. If so, you should use LogByteSize/DocMergePolicy during
+ indexing.
Index: lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java
--- lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java Fri Feb 11 10:33:36 2011 -0500
@@ -39,7 +39,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
@@ -285,9 +285,9 @@
IndexWriterConfig conf = new IndexWriterConfig(
Version.LUCENE_CURRENT, analyzer).setOpenMode(
create ? OpenMode.CREATE : OpenMode.APPEND);
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundIndex);
- lmp.setMergeFactor(mergeFactor);
+ TieredMergePolicy tmp = (TieredMergePolicy) conf.getMergePolicy();
+ tmp.setUseCompoundFile(useCompoundIndex);
+ tmp.setMaxMergeAtOnce(mergeFactor);
IndexWriter writer = new IndexWriter(dir, conf);
int totalFiles = 0;
int totalIndexed = 0;
Index: lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
--- lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Fri Feb 11 10:33:36 2011 -0500
@@ -65,7 +65,7 @@
// create dir data
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < 20; i++) {
Document document = new Document();
@@ -91,7 +91,7 @@
// create dir data
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
indexWriter.setInfoStream(VERBOSE ? System.out : null);
if (VERBOSE) {
System.out.println("TEST: make test index");
Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
--- lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Fri Feb 11 10:33:36 2011 -0500
@@ -54,7 +54,7 @@
super.setUp();
store = newDirectory();
IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
--- lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Fri Feb 11 10:33:36 2011 -0500
@@ -32,7 +32,7 @@
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
- IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
Document doc;
for (int i = 0; i < NUM_DOCS; i++) {
doc = new Document();
Index: lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
--- lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java Fri Feb 11 10:33:36 2011 -0500
@@ -30,7 +30,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@@ -137,7 +137,7 @@
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer());
cfg.setCodecProvider(new AppendingCodecProvider());
- ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
+ ((TieredMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
IndexWriter writer = new IndexWriter(dir, cfg);
Document doc = new Document();
doc.add(newField("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
Index: lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
--- lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java Fri Feb 11 10:33:36 2011 -0500
@@ -59,7 +59,7 @@
super.setUp();
store = newDirectory();
IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
Index: lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java
--- lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java Fri Feb 11 10:33:36 2011 -0500
@@ -43,7 +43,7 @@
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
//Add series of docs with filterable fields : url, text and dates flags
addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");
Index: lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java
--- lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java Fri Feb 11 10:33:36 2011 -0500
@@ -40,7 +40,7 @@
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
//Add series of docs with misspelt names
addDoc(writer, "jonathon smythe","1");
Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
--- lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Fri Feb 11 10:33:36 2011 -0500
@@ -29,7 +29,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Terms;
@@ -45,7 +45,6 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;
-import org.apache.lucene.util.VirtualMethod;
/**
*
@@ -508,7 +507,7 @@
ensureOpen();
final Directory dir = this.spellIndex;
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
- ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(mergeFactor);
+ ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setMaxMergeAtOnce(mergeFactor);
IndexSearcher indexSearcher = obtainSearcher();
final List termsEnums = new ArrayList();
Index: lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
--- lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java Fri Feb 11 10:33:36 2011 -0500
@@ -36,7 +36,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
@@ -250,7 +250,7 @@
// override the specific index if it already exists
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
Version.LUCENE_CURRENT, ana).setOpenMode(OpenMode.CREATE));
- ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why?
+ ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why?
Iterator i1 = word2Nums.keySet().iterator();
while (i1.hasNext()) // for each word
{
Index: lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
--- lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java Fri Feb 11 10:33:36 2011 -0500
@@ -29,6 +29,7 @@
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
public class TestWordnet extends LuceneTestCase {
private IndexSearcher searcher;
@@ -42,6 +43,7 @@
// create a temporary synonym index
File testFile = getDataFile("testSynonyms.txt");
String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName };
+ _TestUtil.rmDir(new File(storePathName));
try {
Syns2Index.main(commandLineArgs);
@@ -71,8 +73,12 @@
@Override
public void tearDown() throws Exception {
- searcher.close();
- dir.close();
+ if (searcher != null) {
+ searcher.close();
+ }
+ if (dir != null) {
+ dir.close();
+ }
rmDir(storePathName); // delete our temporary synonym index
super.tearDown();
}
Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java
--- lucene/src/java/org/apache/lucene/index/IndexWriter.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/IndexWriter.java Fri Feb 11 10:33:36 2011 -0500
@@ -920,7 +920,7 @@
* message when maxFieldLength is reached will be printed
* to this.
*/
- public void setInfoStream(PrintStream infoStream) {
+ public void setInfoStream(PrintStream infoStream) throws IOException {
ensureOpen();
this.infoStream = infoStream;
docWriter.setInfoStream(infoStream);
@@ -930,7 +930,7 @@
messageState();
}
- private void messageState() {
+ private void messageState() throws IOException {
message("\ndir=" + directory + "\n" +
"index=" + segString() + "\n" +
"version=" + Constants.LUCENE_VERSION + "\n" +
@@ -1684,6 +1684,8 @@
throws CorruptIndexException, IOException {
ensureOpen();
+ flush(true, true);
+
if (infoStream != null)
message("expungeDeletes: index now " + segString());
@@ -1756,6 +1758,10 @@
* documents, so you must do so yourself if necessary.
* See also {@link #expungeDeletes(boolean)}
*
+ * NOTE: this method first flushes a new
+ * segment (if there are indexed documents), and applies
+ * all buffered deletes.
+ *
*
NOTE: if this method hits an OutOfMemoryError
* you should immediately close the writer. See above for details.
@@ -2598,7 +2604,7 @@
return docWriter.getNumDocs();
}
- private void ensureValidMerge(MergePolicy.OneMerge merge) {
+ private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
for(SegmentInfo info : merge.segments) {
if (segmentInfos.indexOf(info) == -1) {
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
@@ -2870,7 +2876,7 @@
* are now participating in a merge, and true is
* returned. Else (the merge conflicts) false is
* returned. */
- final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException {
+ final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException {
if (merge.registerDone)
return true;
@@ -2880,10 +2886,8 @@
throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));
}
- final int count = merge.segments.size();
boolean isExternal = false;
- for(int i=0;i 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
}
- public synchronized String segString() {
+ /** @lucene.internal */
+ public synchronized String segString() throws IOException {
return segString(segmentInfos);
}
- private synchronized String segString(SegmentInfos infos) {
+ /** @lucene.internal */
+ public synchronized String segString(SegmentInfos infos) throws IOException {
StringBuilder buffer = new StringBuilder();
final int count = infos.size();
for(int i = 0; i < count; i++) {
if (i > 0) {
buffer.append(' ');
}
- final SegmentInfo info = infos.info(i);
- buffer.append(info.toString(directory, 0));
- if (info.dir != directory)
- buffer.append("**");
+ buffer.append(segString(infos.info(i)));
+ }
+
+ return buffer.toString();
+ }
+
+ public synchronized String segString(SegmentInfo info) throws IOException {
+ StringBuilder buffer = new StringBuilder();
+ SegmentReader reader = readerPool.getIfExists(info);
+ try {
+ if (reader != null) {
+ buffer.append(reader.toString());
+ } else {
+ buffer.append(info.toString(directory, 0));
+ if (info.dir != directory) {
+ buffer.append("**");
+ }
+ }
+ } finally {
+ if (reader != null) {
+ readerPool.release(reader);
+ }
}
return buffer.toString();
}
Index: lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
--- lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Fri Feb 11 10:33:36 2011 -0500
@@ -153,7 +153,7 @@
indexingChain = DocumentsWriter.defaultIndexingChain;
mergedSegmentWarmer = null;
codecProvider = CodecProvider.getDefault();
- mergePolicy = new LogByteSizeMergePolicy();
+ mergePolicy = new TieredMergePolicy();
maxThreadStates = DEFAULT_MAX_THREAD_STATES;
readerPooling = DEFAULT_READER_POOLING;
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
--- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Fri Feb 11 10:33:36 2011 -0500
@@ -20,7 +20,6 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
@@ -72,7 +71,6 @@
// out there wrote his own LMP ...
protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
- protected boolean requireContiguousMerge = false;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@@ -111,21 +109,6 @@
writer.get().message("LMP: " + message);
}
- /** If true, merges must be in-order slice of the
- * segments. If false, then the merge policy is free to
- * pick any segments. The default is false, which is
- * in general more efficient than true since it gives the
- * merge policy more freedom to pick closely sized
- * segments. */
- public void setRequireContiguousMerge(boolean v) {
- requireContiguousMerge = v;
- }
-
- /** See {@link #setRequireContiguousMerge}. */
- public boolean getRequireContiguousMerge() {
- return requireContiguousMerge;
- }
-
/** Returns the number of segments that are merged at
* once and also controls the total number of segments
* allowed to accumulate in the index.
*/
@@ -378,8 +361,6 @@
return null;
}
- // TODO: handle non-contiguous merge case differently?
-
// Find the newest (rightmost) segment that needs to
// be optimized (other segments may have been flushed
// since optimize started):
@@ -499,14 +480,6 @@
}
}
- private static class SortByIndex implements Comparator {
- public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) {
- return o1.index - o2.index;
- }
- }
-
- private static final SortByIndex sortByIndex = new SortByIndex();
-
/** Checks if any merges are now necessary and returns a
* {@link MergePolicy.MergeSpecification} if so. A merge
* is necessary when there are more than {@link
@@ -532,31 +505,24 @@
final SegmentInfo info = infos.info(i);
long size = size(info);
- // When we require contiguous merge, we still add the
- // segment to levels to avoid merging "across" a set
- // of segment being merged:
- if (!requireContiguousMerge && mergingSegments.contains(info)) {
- if (verbose()) {
- message("seg " + info.name + " already being merged; skip");
- }
- continue;
- }
-
// Floor tiny segments
if (size < 1) {
size = 1;
}
+
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
levels.add(infoLevel);
+
if (verbose()) {
- message("seg " + info.name + " level=" + infoLevel.level + " size=" + size);
+ final long segBytes = sizeBytes(info);
+ String extra = mergingSegments.contains(info) ? " [merging]" : "";
+ if (size >= maxMergeSize) {
+ extra += " [skip: too large]";
+ }
+ message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra);
}
}
- if (!requireContiguousMerge) {
- Collections.sort(levels);
- }
-
final float levelFloor;
if (minMergeSize <= 0)
levelFloor = (float) 0.0;
@@ -614,23 +580,29 @@
int end = start + mergeFactor;
while(end <= 1+upto) {
boolean anyTooLarge = false;
+ boolean anyMerging = false;
for(int i=start;i= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
+ if (mergingSegments.contains(info)) {
+ anyMerging = true;
+ break;
+ }
}
- if (!anyTooLarge) {
+ if (anyMerging) {
+ // skip
+ } else if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
- if (verbose()) {
- message(" " + start + " to " + end + ": add this merge");
- }
- Collections.sort(levels.subList(start, end), sortByIndex);
final SegmentInfos mergeInfos = new SegmentInfos();
for(int i=start;iFor normal merging, this policy first computes a
+ * "budget" of how many segments are allowed by be in the
+ * index. If the index is over-budget, then the policy
+ * sorts segments by decresing size (pro-rating by percent
+ * deletes), and then finds the least-cost merge. Merge
+ * cost is measured by a combination of the "skew" of the
+ * merge (size of largest seg divided by smallest seg),
+ * total merge size and pct deletes reclaimed,
+ * so that merges with lower skew, smaller size
+ * and those reclaiming more deletes, are
+ * favored.
+ *
+ * If a merge will produce a segment that's larger than
+ * {@link #setMaxMergedSegmentMB}, then the policy will
+ * merge fewer segments (down to 1 at once, if that one has
+ * deletions) to keep the segment size under budget.
+ *
+ *
NOTE: this policy freely merges non-adjacent
+ * segments; if this is a problem, use {@link
+ * LogMergePolicy}.
+ *
+ *
NOTE: This policy always merges by byte size
+ * of the segments, always pro-rates by percent deletes,
+ * and does not apply any maximum segment size during
+ * optimize (unlike {@link LogByteSizeMergePolicy}.
+ *
+ * @lucene.experimental
+ */
+
+// TODO
+// - we could try to take into account whether a large
+// merge is already running (under CMS) and then bias
+// ourselves towards picking smaller merges if so (or,
+// maybe CMS should do so)
+
+public class TieredMergePolicy extends MergePolicy {
+
+ private int maxMergeAtOnce = 10;
+ private long maxMergedSegmentBytes = 5*1024*1024*1024L;
+ private int maxMergeAtOnceExplicit = 30;
+
+ private long floorSegmentBytes = 2*1024*1024L;
+ private double segsPerTier = 10.0;
+ private double expungeDeletesPctAllowed = 10.0;
+ private boolean useCompoundFile = true;
+ private double noCFSRatio = 0.1;
+
+ /** Maximum number of segments to be merged at a time
+ * during "normal" merging. For explicit merging (eg,
+ * optimize or expungeDeletes was called), see {@link
+ * #setMaxMergeAtOnceExplicit}. Default is 10. */
+ public TieredMergePolicy setMaxMergeAtOnce(int v) {
+ if (v < 2) {
+ throw new IllegalArgumentException("maxMergeAtOnce must be > 1 (got " + v + ")");
+ }
+ maxMergeAtOnce = v;
+ return this;
+ }
+
+ /** @see #setMaxMergeAtOnce */
+ public int getMaxMergeAtOnce() {
+ return maxMergeAtOnce;
+ }
+
+ // TODO: should addIndexes do explicit merging, too? And,
+ // if user calls IW.maybeMerge "explicitly"
+
+ /** Maximum number of segments to be merged at a time,
+ * during optimize or expungeDeletes. Default is 30. */
+ public TieredMergePolicy setMaxMergeAtOnceExplicit(int v) {
+ if (v < 2) {
+ throw new IllegalArgumentException("maxMergeAtOnceExplicit must be > 1 (got " + v + ")");
+ }
+ maxMergeAtOnceExplicit = v;
+ return this;
+ }
+
+ /** @see #setMaxMergeAtOnceExplicit */
+ public int getMaxMergeAtOnceExplicit() {
+ return maxMergeAtOnceExplicit;
+ }
+
+ /** Maximum sized segment to produce during
+ * normal merging. This setting is approximate: the
+ * estimate of the merged segment size is made by summing
+ * sizes of to-be-merged segments (compensating for
+ * percent deleted docs). Default is 5 GB. */
+ public TieredMergePolicy setMaxMergedSegmentMB(double v) {
+ maxMergedSegmentBytes = (long) (v*1024*1024);
+ return this;
+ }
+
+ /** @see #getMaxMergedSegmentMB */
+ public double getMaxMergedSegmentMB() {
+ return maxMergedSegmentBytes/1024/1024.;
+ }
+
+ /** Segments smaller than this are "rounded up" to this
+ * size, ie treated as equal (floor) size for merge
+ * selection. This is to prevent frequent flushing of
+ * tiny segments from allowing a long tail in the index.
+ * Default is 2 MB. */
+ public TieredMergePolicy setFloorSegmentMB(double v) {
+ if (v <= 0.0) {
+ throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
+ }
+ floorSegmentBytes = (long) (v*1024*1024);
+ return this;
+ }
+
+ /** @see #setFloorSegmentMB */
+ public double getFloorSegmentMB() {
+ return floorSegmentBytes/1024*1024.;
+ }
+
+ /** When expungeDeletes is called, we only merge away a
+ * segment if its delete percentage is over this
+ * threshold. Default is 10%. */
+ public TieredMergePolicy setExpungeDeletesPctAllowed(double v) {
+ if (v < 0.0 || v > 100.0) {
+ throw new IllegalArgumentException("expungeDeletesPctAllowed must be between 0.0 and 100.0 inclusive (got " + v + ")");
+ }
+ expungeDeletesPctAllowed = v;
+ return this;
+ }
+
+ /** @see #setExpungeDeletesPctAllowed */
+ public double getExpungeDeletesPctAllowed() {
+ return expungeDeletesPctAllowed;
+ }
+
+ /** Sets the allowed number of segments per tier. Smaller
+ * values mean more merging but fewer segments.
+ * setMaxMergeAtOnce} otherwise you'll hit
+ * Default is 10.0. */
+ public TieredMergePolicy setSegmentsPerTier(double v) {
+ if (v < 2.0) {
+ throw new IllegalArgumentException("segmentsPerTier must be >= 2.0 (got " + v + ")");
+ }
+ segsPerTier = v;
+ return this;
+ }
+
+ /** @see #setSegmentsPerTier */
+ public double getSegmentsPerTier() {
+ return segsPerTier;
+ }
+
+ /** Sets whether compound file format should be used for
+ * newly flushed and newly merged segments. Default
+ * true. */
+ public TieredMergePolicy setUseCompoundFile(boolean useCompoundFile) {
+ this.useCompoundFile = useCompoundFile;
+ return this;
+ }
+
+ /** @see #setUseCompoundFile */
+ public boolean getUseCompoundFile() {
+ return useCompoundFile;
+ }
+
+ /** If a merged segment will be more than this percentage
+ * of the total size of the index, leave the segment as
+ * non-compound file even if compound file is enabled.
+ * Set to 1.0 to always use CFS regardless of merge
+ * size. Default is 0.1. */
+ public TieredMergePolicy setNoCFSRatio(double noCFSRatio) {
+ if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
+ throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio);
+ }
+ this.noCFSRatio = noCFSRatio;
+ return this;
+ }
+
+ /** @see #setNoCFSRatio */
+ public double getNoCFSRatio() {
+ return noCFSRatio;
+ }
+
+ private class SegmentByteSizeDescending implements Comparator {
+ public int compare(SegmentInfo o1, SegmentInfo o2) {
+ try {
+ final long sz1 = size(o1);
+ final long sz2 = size(o2);
+ if (sz1 > sz2) {
+ return -1;
+ } else if (sz2 > sz1) {
+ return 1;
+ } else {
+ return o1.name.compareTo(o2.name);
+ }
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+ }
+ }
+
+ private final Comparator segmentByteSizeDescending = new SegmentByteSizeDescending();
+
+ protected static abstract class MergeScore {
+ abstract double getScore();
+ abstract String getExplanation();
+ }
+
+ @Override
+ public MergeSpecification findMerges(SegmentInfos infos) throws IOException {
+ if (verbose()) {
+ message("findMerges: " + infos.size() + " segments");
+ }
+ if (infos.size() == 0) {
+ return null;
+ }
+ final Collection merging = writer.get().getMergingSegments();
+ final Collection toBeMerged = new HashSet();
+
+ final SegmentInfos infosSorted = new SegmentInfos();
+ infosSorted.addAll(infos);
+
+ Collections.sort(infosSorted, segmentByteSizeDescending);
+
+ // Compute total index bytes & print details about the index
+ long totIndexBytes = 0;
+ long minSegmentBytes = Long.MAX_VALUE;
+ for(SegmentInfo info : infosSorted) {
+ final long segBytes = size(info);
+ if (verbose()) {
+ String extra = merging.contains(info) ? " [merging]" : "";
+ if (segBytes >= maxMergedSegmentBytes/2.0) {
+ extra += " [skip: too large]";
+ } else if (segBytes < floorSegmentBytes) {
+ extra += " [floored]";
+ }
+ message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra);
+ }
+
+ minSegmentBytes = Math.min(segBytes, minSegmentBytes);
+ // Accum total byte size
+ totIndexBytes += segBytes;
+ }
+
+ // If we have too-large segments, grace them out
+ // of the maxSegmentCount:
+ int tooBigCount = 0;
+ while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
+ totIndexBytes -= size(infosSorted.get(tooBigCount));
+ tooBigCount++;
+ }
+
+ minSegmentBytes = floorSize(minSegmentBytes);
+
+ // Compute max allowed segs in the index
+ long levelSize = minSegmentBytes;
+ long bytesLeft = totIndexBytes;
+ double allowedSegCount = 0;
+ while(true) {
+ final double segCountLevel = bytesLeft / (double) levelSize;
+ if (segCountLevel < segsPerTier) {
+ allowedSegCount += Math.ceil(segCountLevel);
+ break;
+ }
+ allowedSegCount += segsPerTier;
+ bytesLeft -= segsPerTier * levelSize;
+ levelSize *= maxMergeAtOnce;
+ }
+ int allowedSegCountInt = (int) allowedSegCount;
+
+ MergeSpecification spec = null;
+
+ // Cycle to possibly select more than one merge:
+ while(true) {
+
+ // Gather eligible segments for merging, ie segments
+ // not already being merged and not already picked (by
+ // prior iteration of this loop) for merging:
+ final SegmentInfos eligible = new SegmentInfos();
+ for(int idx = tooBigCount; idx segmentsToOptimize) throws IOException {
+ if (verbose()) {
+ message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
+ }
+ SegmentInfos eligible = new SegmentInfos();
+ boolean optimizeMergeRunning = false;
+ final Collection merging = writer.get().getMergingSegments();
+ for(SegmentInfo info : infos) {
+ if (segmentsToOptimize.contains(info)) {
+ if (!merging.contains(info)) {
+ eligible.add(info);
+ } else {
+ optimizeMergeRunning = true;
+ }
+ }
+ }
+
+ if (eligible.size() == 0) {
+ return null;
+ }
+
+ if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) ||
+ (maxSegmentCount == 1 && eligible.size() == 1 && isOptimized(eligible.get(0)))) {
+ if (verbose()) {
+ message("already optimized");
+ }
+ return null;
+ }
+
+ Collections.sort(eligible, segmentByteSizeDescending);
+
+ if (verbose()) {
+ message("eligible=" + eligible);
+ message("optimizeMergeRunning=" + optimizeMergeRunning);
+ }
+
+ int end = eligible.size();
+
+ MergeSpecification spec = null;
+
+ // Do full merges, first, backwards:
+ while(end >= maxMergeAtOnceExplicit + maxSegmentCount - 1) {
+ if (spec == null) {
+ spec = new MergeSpecification();
+ }
+ final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end));
+ if (verbose()) {
+ message("add merge=" + writer.get().segString(merge.segments));
+ }
+ spec.add(merge);
+ end -= maxMergeAtOnceExplicit;
+ }
+
+ if (spec == null && !optimizeMergeRunning) {
+ // Do final merge
+ final int numToMerge = end - maxSegmentCount + 1;
+ final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end));
+ if (verbose()) {
+ message("add final merge=" + merge.segString(writer.get().getDirectory()));
+ }
+ spec = new MergeSpecification();
+ spec.add(merge);
+ }
+
+ return spec;
+ }
+
+ @Override
+ public MergeSpecification findMergesToExpungeDeletes(SegmentInfos infos)
+ throws CorruptIndexException, IOException {
+ if (verbose()) {
+ message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed);
+ }
+ final SegmentInfos eligible = new SegmentInfos();
+ final Collection merging = writer.get().getMergingSegments();
+ for(SegmentInfo info : infos) {
+ double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount;
+ if (pctDeletes > expungeDeletesPctAllowed && !merging.contains(info)) {
+ eligible.add(info);
+ }
+ }
+
+ if (eligible.size() == 0) {
+ return null;
+ }
+
+ Collections.sort(eligible, segmentByteSizeDescending);
+
+ if (verbose()) {
+ message("eligible=" + eligible);
+ }
+
+ int start = 0;
+ MergeSpecification spec = null;
+
+ while(start < eligible.size()) {
+ long totAfterMergeBytes = 0;
+ int upto = start;
+ boolean done = false;
+ while(upto < start + maxMergeAtOnceExplicit) {
+ if (upto == eligible.size()) {
+ done = true;
+ break;
+ }
+ final SegmentInfo info = eligible.get(upto);
+ final long segBytes = size(info);
+ if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
+ // TODO: we could be smarter here, eg cherry
+ // picking smaller merges that'd sum up to just
+ // around the max size
+ break;
+ }
+ totAfterMergeBytes += segBytes;
+ upto++;
+ }
+
+ if (upto == start) {
+ // Single segment is too big; grace it
+ start++;
+ continue;
+ }
+
+ if (spec == null) {
+ spec = new MergeSpecification();
+ }
+
+ final OneMerge merge = new OneMerge(eligible.range(start, upto));
+ if (verbose()) {
+ message("add merge=" + writer.get().segString(merge.segments));
+ }
+ spec.add(merge);
+ start = upto;
+ if (done) {
+ break;
+ }
+ }
+
+ return spec;
+ }
+
+ @Override
+ public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException {
+ final boolean doCFS;
+
+ if (!useCompoundFile) {
+ doCFS = false;
+ } else if (noCFSRatio == 1.0) {
+ doCFS = true;
+ } else {
+ long totalSize = 0;
+ for (SegmentInfo info : infos)
+ totalSize += size(info);
+
+ doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
+ }
+ return doCFS;
+ }
+
+ @Override
+ public void close() {
+ }
+
+ private boolean isOptimized(SegmentInfo info)
+ throws IOException {
+ IndexWriter w = writer.get();
+ assert w != null;
+ boolean hasDeletions = w.numDeletedDocs(info) > 0;
+ return !hasDeletions &&
+ !info.hasSeparateNorms() &&
+ info.dir == w.getDirectory() &&
+ (info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
+ }
+
+ // Segment size in bytes, pro-rated by % deleted
+ private long size(SegmentInfo info) throws IOException {
+ final long byteSize = info.sizeInBytes(true);
+ final int delCount = writer.get().numDeletedDocs(info);
+ final double delRatio = (info.docCount <= 0 ? 0.0f : ((double)delCount / (double)info.docCount));
+ assert delRatio <= 1.0;
+ return (long) (byteSize * (1.0-delRatio));
+ }
+
+ private long floorSize(long bytes) {
+ return Math.max(floorSegmentBytes, bytes);
+ }
+
+ private boolean verbose() {
+ IndexWriter w = writer.get();
+ return w != null && w.verbose();
+ }
+
+ private void message(String message) {
+ if (verbose()) {
+ writer.get().message("TMP: " + message);
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": ");
+ sb.append("maxMergeAtOnce=").append(maxMergeAtOnce).append(", ");
+ sb.append("maxMergeAtOnceExplicit=").append(maxMergeAtOnceExplicit).append(", ");
+ sb.append("maxMergedSegmentMB=").append(maxMergedSegmentBytes/1024/1024.).append(", ");
+ sb.append("floorSegmentMB=").append(floorSegmentBytes/1024/1024.).append(", ");
+ sb.append("expungeDeletesPctAllowed=").append(expungeDeletesPctAllowed).append(", ");
+ sb.append("segmentsPerTier=").append(segsPerTier).append(", ");
+ sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
+ sb.append("noCFSRatio=").append(noCFSRatio);
+ return sb.toString();
+ }
+}
\ No newline at end of file
Index: lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java
--- lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java Fri Feb 11 10:33:36 2011 -0500
@@ -128,7 +128,7 @@
body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(body);
- id = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
+ id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
doc.add(id);
date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
--- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Fri Feb 11 10:33:36 2011 -0500
@@ -763,9 +763,11 @@
}
if (r.nextBoolean()) {
+ c.setMergePolicy(newTieredMergePolicy());
+ } else if (r.nextBoolean()) {
+ c.setMergePolicy(newLogMergePolicy());
+ } else {
c.setMergePolicy(new MockRandomMergePolicy(r));
- } else {
- c.setMergePolicy(newLogMergePolicy());
}
c.setReaderPooling(r.nextBoolean());
@@ -777,6 +779,10 @@
return newLogMergePolicy(random);
}
+ public static TieredMergePolicy newTieredMergePolicy() {
+ return newTieredMergePolicy(random);
+ }
+
public static LogMergePolicy newLogMergePolicy(Random r) {
LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy();
logmp.setUseCompoundFile(r.nextBoolean());
@@ -789,17 +795,22 @@
return logmp;
}
- public static LogMergePolicy newInOrderLogMergePolicy() {
- LogMergePolicy logmp = newLogMergePolicy();
- logmp.setRequireContiguousMerge(true);
- return logmp;
- }
-
- public static LogMergePolicy newInOrderLogMergePolicy(int mergeFactor) {
- LogMergePolicy logmp = newLogMergePolicy();
- logmp.setMergeFactor(mergeFactor);
- logmp.setRequireContiguousMerge(true);
- return logmp;
+ public static TieredMergePolicy newTieredMergePolicy(Random r) {
+ TieredMergePolicy tmp = new TieredMergePolicy();
+ if (r.nextInt(3) == 2) {
+ tmp.setMaxMergeAtOnce(2);
+ tmp.setMaxMergeAtOnceExplicit(2);
+ } else {
+ tmp.setMaxMergeAtOnce(_TestUtil.nextInt(r, 2, 20));
+ tmp.setMaxMergeAtOnceExplicit(_TestUtil.nextInt(r, 2, 30));
+ }
+ tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0);
+ tmp.setFloorSegmentMB(0.2 + r.nextDouble() * 2.0);
+ tmp.setExpungeDeletesPctAllowed(0.0 + r.nextDouble() * 30.0);
+ tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 2, 20));
+ tmp.setUseCompoundFile(r.nextBoolean());
+ tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
+ return tmp;
}
public static LogMergePolicy newLogMergePolicy(boolean useCFS) {
Index: lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
--- lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Fri Feb 11 10:33:36 2011 -0500
@@ -38,10 +38,13 @@
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.Directory;
+import org.junit.Assert;
public class _TestUtil {
@@ -283,9 +286,14 @@
// count lowish
public static void reduceOpenFiles(IndexWriter w) {
// keep number of open files lowish
- LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
- lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
-
+ MergePolicy mp = w.getConfig().getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ LogMergePolicy lmp = (LogMergePolicy) mp;
+ lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
+ } else if (mp instanceof TieredMergePolicy) {
+ TieredMergePolicy tmp = (TieredMergePolicy) mp;
+ tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
+ }
MergeScheduler ms = w.getConfig().getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
((ConcurrentMergeScheduler) ms).setMaxThreadCount(2);
Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
--- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Fri Feb 11 10:33:36 2011 -0500
@@ -1039,8 +1039,9 @@
IndexReader[] readers = new IndexReader[] { IndexReader.open(dirs[0]), IndexReader.open(dirs[1]) };
Directory dir = new RAMDirectory();
- IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
+ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy());
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
+ lmp.setUseCompoundFile(true);
lmp.setNoCFSRatio(1.0); // Force creation of CFS
IndexWriter w3 = new IndexWriter(dir, conf);
w3.addIndexes(readers);
Index: lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java
--- lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java Fri Feb 11 10:33:36 2011 -0500
@@ -129,7 +129,7 @@
IndexWriterConfig conf = new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer())
.setMaxBufferedDocs(7);
- ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(3);
+ ((TieredMergePolicy) conf.getMergePolicy()).setMaxMergeAtOnce(3);
IndexWriter writer = new MockIndexWriter(directory, conf);
writer.setInfoStream(VERBOSE ? System.out : null);
Index: lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
--- lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java Fri Feb 11 10:33:36 2011 -0500
@@ -619,7 +619,7 @@
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer())
- .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy).setMergePolicy(newInOrderLogMergePolicy());
+ .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy).setMergePolicy(newLogMergePolicy());
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
Index: lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
--- lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java Fri Feb 11 10:33:36 2011 -0500
@@ -116,7 +116,7 @@
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(
- MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newInOrderLogMergePolicy()));
+ MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newLogMergePolicy()));
int numDocs = 131;
int max = 1051;
int term = random.nextInt(max);
@@ -197,7 +197,7 @@
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(
- MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newInOrderLogMergePolicy()));
+ MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newLogMergePolicy()));
int numDocs = 499;
int max = 15678;
int term = random.nextInt(max);
Index: lucene/src/test/org/apache/lucene/index/TestIndexReader.java
--- lucene/src/test/org/apache/lucene/index/TestIndexReader.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestIndexReader.java Fri Feb 11 10:33:36 2011 -0500
@@ -371,7 +371,7 @@
Directory dir = newDirectory();
byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < 10; i++) {
addDoc(writer, "document number " + (i + 1));
@@ -380,7 +380,7 @@
addDocumentWithTermVectorFields(writer);
}
writer.close();
- writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy()));
+ writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
doc.add(new Field("bin1", bin));
doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED));
@@ -417,7 +417,7 @@
// force optimize
- writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy()));
+ writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
writer.optimize();
writer.close();
reader = IndexReader.open(dir, false);
@@ -1163,7 +1163,7 @@
public void testMultiReaderDeletes() throws Exception {
Directory dir = newDirectory();
- RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
doc.add(newField("f", "doctor", Field.Store.NO, Field.Index.NOT_ANALYZED));
w.addDocument(doc);
Index: lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
--- lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java Fri Feb 11 10:33:36 2011 -0500
@@ -174,7 +174,7 @@
private void doTestReopenWithCommit (Random random, Directory dir, boolean withReopen) throws IOException {
IndexWriter iwriter = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(
- OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newInOrderLogMergePolicy()));
+ OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newLogMergePolicy()));
iwriter.commit();
IndexReader reader = IndexReader.open(dir, false);
try {
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java Fri Feb 11 10:33:36 2011 -0500
@@ -68,7 +68,7 @@
assertNull(conf.getMergedSegmentWarmer());
assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates());
assertEquals(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.getReaderTermsIndexDivisor());
- assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass());
+ assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass());
// Sanity check - validate that all getters are covered.
Set getters = new HashSet();
@@ -246,7 +246,7 @@
assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates());
// Test MergePolicy
- assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass());
+ assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass());
conf.setMergePolicy(new LogDocMergePolicy());
assertEquals(LogDocMergePolicy.class, conf.getMergePolicy().getClass());
conf.setMergePolicy(null);
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Fri Feb 11 10:33:36 2011 -0500
@@ -104,7 +104,7 @@
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
setMaxBufferedDocs(10).
- setMergePolicy(newInOrderLogMergePolicy())
+ setMergePolicy(newLogMergePolicy())
);
for (int i = 0; i < 250; i++) {
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java Fri Feb 11 10:33:36 2011 -0500
@@ -58,7 +58,7 @@
IndexWriter writer = new IndexWriter(
merged,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
- setMergePolicy(newInOrderLogMergePolicy(2))
+ setMergePolicy(newLogMergePolicy(2))
);
writer.setInfoStream(VERBOSE ? System.out : null);
writer.addIndexes(indexA, indexB);
@@ -101,7 +101,7 @@
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
setOpenMode(OpenMode.CREATE).
setMaxBufferedDocs(2).
- setMergePolicy(newInOrderLogMergePolicy(2))
+ setMergePolicy(newLogMergePolicy(2))
);
for (int i = start; i < (start + numDocs); i++)
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java Fri Feb 11 10:33:36 2011 -0500
@@ -46,7 +46,7 @@
import java.util.concurrent.atomic.AtomicInteger;
public class TestIndexWriterReader extends LuceneTestCase {
- static PrintStream infoStream;
+ static PrintStream infoStream = VERBOSE ? System.out : null;
public static int count(Term t, IndexReader r) throws IOException {
int count = 0;
Index: lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
--- lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java Fri Feb 11 10:33:36 2011 -0500
@@ -45,7 +45,7 @@
super.setUp();
dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
- new MockAnalyzer(MockTokenizer.SIMPLE, true)).setMergePolicy(newInOrderLogMergePolicy());
+ new MockAnalyzer(MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy());
config.setSimilarityProvider(new TestSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
Document doc = new Document();
Index: lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
--- lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java Fri Feb 11 10:33:36 2011 -0500
@@ -71,7 +71,7 @@
public void testSimpleSkip() throws IOException {
Directory dir = new CountingRAMDirectory(new RAMDirectory());
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")).setMergePolicy(newInOrderLogMergePolicy()));
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")).setMergePolicy(newLogMergePolicy()));
Term term = new Term("test", "a");
for (int i = 0; i < 5000; i++) {
Document d1 = new Document();
Index: lucene/src/test/org/apache/lucene/index/TestNRTThreads.java
--- lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Fri Feb 11 10:33:36 2011 -0500
@@ -102,18 +102,7 @@
if (VERBOSE) {
writer.setInfoStream(System.out);
}
- MergeScheduler ms = writer.getConfig().getMergeScheduler();
- if (ms instanceof ConcurrentMergeScheduler) {
- // try to keep max file open count down
- ((ConcurrentMergeScheduler) ms).setMaxThreadCount(1);
- ((ConcurrentMergeScheduler) ms).setMaxMergeCount(1);
- }
- /*
- LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
- if (lmp.getMergeFactor() > 5) {
- lmp.setMergeFactor(5);
- }
- */
+ _TestUtil.reduceOpenFiles(writer);
final int NUM_INDEX_THREADS = 2;
final int NUM_SEARCH_THREADS = 3;
@@ -147,36 +136,36 @@
}
if (random.nextBoolean()) {
if (VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("id"));
+ //System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("docid"));
}
writer.addDocument(doc);
} else {
// we use update but it never replaces a
// prior doc
if (VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("id"));
+ //System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("docid"));
}
- writer.updateDocument(new Term("id", doc.get("id")), doc);
+ writer.updateDocument(new Term("docid", doc.get("docid")), doc);
}
if (random.nextInt(5) == 3) {
if (VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("id"));
+ //System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("docid"));
}
- toDeleteIDs.add(doc.get("id"));
+ toDeleteIDs.add(doc.get("docid"));
}
if (random.nextInt(50) == 17) {
if (VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes");
+ //System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes");
}
for(String id : toDeleteIDs) {
if (VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": del term=id:" + id);
+ //System.out.println(Thread.currentThread().getName() + ": del term=id:" + id);
}
- writer.deleteDocuments(new Term("id", id));
+ writer.deleteDocuments(new Term("docid", id));
}
final int count = delCount.addAndGet(toDeleteIDs.size());
if (VERBOSE) {
- System.out.println(Thread.currentThread().getName() + ": tot " + count + " deletes");
+ //System.out.println(Thread.currentThread().getName() + ": tot " + count + " deletes");
}
delIDs.addAll(toDeleteIDs);
toDeleteIDs.clear();
@@ -357,18 +346,18 @@
final IndexSearcher s = newSearcher(r2);
boolean doFail = false;
for(String id : delIDs) {
- final TopDocs hits = s.search(new TermQuery(new Term("id", id)), 1);
+ final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
if (hits.totalHits != 0) {
System.out.println("doc id=" + id + " is supposed to be deleted, but got docID=" + hits.scoreDocs[0].doc);
doFail = true;
}
}
- final int endID = Integer.parseInt(docs.nextDoc().get("id"));
+ final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
for(int id=0;id> docs = new ArrayList>();
Document d = new Document();
Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED);
Index: lucene/src/test/org/apache/lucene/search/TestSort.java
--- lucene/src/test/org/apache/lucene/search/TestSort.java Thu Feb 10 05:03:34 2011 -0500
+++ lucene/src/test/org/apache/lucene/search/TestSort.java Fri Feb 11 10:33:36 2011 -0500
@@ -120,7 +120,7 @@
throws IOException {
Directory indexStore = newDirectory();
dirs.add(indexStore);
- RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
for (int i=0; i