Index: src/test/org/apache/lucene/TestSearch.java =================================================================== --- src/test/org/apache/lucene/TestSearch.java (revision 566282) +++ src/test/org/apache/lucene/TestSearch.java (working copy) @@ -79,7 +79,7 @@ Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); - writer.setUseCompoundFile(useCompoundFile); + ((LogDocMergePolicy)writer.getMergePolicy()).setUseCompoundFile(useCompoundFile); String[] docs = { "a b c d e", Index: src/test/org/apache/lucene/IndexTest.java =================================================================== --- src/test/org/apache/lucene/IndexTest.java (revision 566282) +++ src/test/org/apache/lucene/IndexTest.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.demo.FileDocument; import java.io.File; @@ -31,7 +32,7 @@ IndexWriter writer = new IndexWriter(File.createTempFile("luceneTest", "idx"), new SimpleAnalyzer(), true); - writer.setMergeFactor(20); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(20); indexDocs(writer, new File("/tmp")); Index: src/test/org/apache/lucene/store/TestBufferedIndexInput.java =================================================================== --- src/test/org/apache/lucene/store/TestBufferedIndexInput.java (revision 566282) +++ src/test/org/apache/lucene/store/TestBufferedIndexInput.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; @@ -160,7 +161,7 @@ MockFSDirectory dir = new MockFSDirectory(indexDir); try { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); - writer.setUseCompoundFile(false); + ((LogDocMergePolicy)writer.getMergePolicy()).setUseCompoundFile(false); for(int i=0;i<37;i++) { Document doc = new Document(); doc.add(new Field("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.TOKENIZED)); Index: src/test/org/apache/lucene/ThreadSafetyTest.java =================================================================== --- src/test/org/apache/lucene/ThreadSafetyTest.java (revision 566282) +++ src/test/org/apache/lucene/ThreadSafetyTest.java (working copy) @@ -62,7 +62,7 @@ // Switch between single and multiple file segments useCompoundFiles = Math.random() < 0.5; - writer.setUseCompoundFile(useCompoundFiles); + ((LogDocMergePolicy)writer.getMergePolicy()).setUseCompoundFile(useCompoundFiles); writer.addDocument(d); Index: src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (revision 566309) +++ src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (working copy) @@ -35,7 +35,7 @@ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); - writer.setMergeFactor(10); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(10); for (int i = 0; i < 100; i++) { addDoc(writer); @@ -51,7 +51,7 @@ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); - writer.setMergeFactor(10); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(10); boolean noOverMerge = false; for (int i = 0; i < 100; i++) { @@ -72,7 +72,7 @@ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); - writer.setMergeFactor(10); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(10); for (int i = 0; i < 100; i++) { addDoc(writer); @@ -80,7 +80,7 @@ writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.setMaxBufferedDocs(10); - writer.setMergeFactor(10); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(10); checkInvariants(writer); } @@ -93,14 +93,14 @@ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); - writer.setMergeFactor(100); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(100); for (int i = 0; i < 250; i++) { addDoc(writer); checkInvariants(writer); } - writer.setMergeFactor(5); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(5); // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds @@ -118,7 +118,7 @@ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(101); - writer.setMergeFactor(101); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(101); // leftmost* segment has 1 doc // rightmost* segment has 100 docs @@ -131,11 +131,11 @@ writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.setMaxBufferedDocs(101); - writer.setMergeFactor(101); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(101); } writer.setMaxBufferedDocs(10); - writer.setMergeFactor(10); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(10); // merge policy only fixes segments on levels where merges // have been triggered, so check invariants after all adds @@ -158,7 +158,7 @@ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(10); - writer.setMergeFactor(100); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(100); for (int i = 0; i < 250; i++) { addDoc(writer); @@ -172,7 +172,7 @@ writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.setMaxBufferedDocs(10); - writer.setMergeFactor(5); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(5); // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { @@ -192,36 +192,37 @@ private void checkInvariants(IndexWriter writer) throws IOException { int maxBufferedDocs = writer.getMaxBufferedDocs(); - int mergeFactor = writer.getMergeFactor(); - int maxMergeDocs = writer.getMaxMergeDocs(); + int mergeFactor = ((LogDocMergePolicy)writer.getMergePolicy()).getMergeFactor(); + int maxMergeDocs = ((LogDocMergePolicy)writer.getMergePolicy()).getMaxMergeDocs(); int ramSegmentCount = writer.getNumBufferedDocuments(); assertTrue(ramSegmentCount < maxBufferedDocs); - int lowerBound = -1; - int upperBound = maxBufferedDocs; + int previousBound = -1; + int currentBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.getSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.getDocCount(i); - assertTrue(docCount > lowerBound); - if (docCount <= upperBound) { + assertTrue(docCount > previousBound); + + if (docCount <= currentBound) { numSegments++; } else { - if (upperBound * mergeFactor <= maxMergeDocs) { + if (currentBound * mergeFactor <= maxMergeDocs) { assertTrue(numSegments < mergeFactor); } do { - lowerBound = upperBound; - upperBound *= mergeFactor; - } while (docCount > upperBound); + previousBound = currentBound; + currentBound *= mergeFactor; + } while (docCount > currentBound); numSegments = 1; } } - if (upperBound * mergeFactor <= maxMergeDocs) { + if (currentBound * mergeFactor <= maxMergeDocs) { assertTrue(numSegments < mergeFactor); } Index: src/test/org/apache/lucene/index/DocHelper.java =================================================================== --- src/test/org/apache/lucene/index/DocHelper.java (revision 566309) +++ src/test/org/apache/lucene/index/DocHelper.java (working copy) @@ -236,7 +236,7 @@ //writer.setUseCompoundFile(false); writer.addDocument(doc); writer.flush(); - SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1); + SegmentInfo info = writer.newestSegment(); writer.close(); return info; } Index: src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- src/test/org/apache/lucene/index/TestDoc.java (revision 566309) +++ src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -168,7 +168,7 @@ Document doc = FileDocument.Document(file); writer.addDocument(doc); writer.flush(); - return writer.segmentInfos.info(writer.segmentInfos.size()-1); + return writer.newestSegment(); } Index: src/test/org/apache/lucene/index/TestLogDocMergePolicy.java =================================================================== --- src/test/org/apache/lucene/index/TestLogDocMergePolicy.java (revision 0) +++ src/test/org/apache/lucene/index/TestLogDocMergePolicy.java (revision 0) @@ -0,0 +1,214 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import junit.framework.TestCase; + +public class TestLogDocMergePolicy extends TestCase { + + void assertNotEquals(Object a, Object b) { + assertFalse(a.equals(b)); + } + + MockIndexMerger merger; + LogDocMergePolicy policy; + + void set(String sizes) { + merger = new MockIndexMerger (sizes); + policy = new LogDocMergePolicy(); + policy.setMaxMergeDocs(100000); + } + + void merge(String before, String after, int mergeFactor, int minMergeDocs) + throws CorruptIndexException, IOException { + set(before); + merger.setMaxBufferedDocs(minMergeDocs); + policy.setMergeFactor(mergeFactor); + policy.merge(merger.segmentInfos, merger); + MockIndexMerger other = new MockIndexMerger(after); + assertEquals(other.toString(), merger.toString()); + } + + void merge(String before, String after, int mergeFactor) + throws CorruptIndexException, IOException { + merge(before, after, mergeFactor, MockIndexMerger.DEFAULT_MAX_BUFFERED_DOCS); + + } + + void merge(String before, String after) + throws CorruptIndexException, IOException { + merge(before, + after, + LogDocMergePolicy.DEFAULT_MERGE_FACTOR, + MockIndexMerger.DEFAULT_MAX_BUFFERED_DOCS); + } + + public void testOneMerge() + throws CorruptIndexException, IOException { + merge("1000 10 0 10", "1000 10 10", 4, 4); + } + + public void testConsistentMerge() + throws CorruptIndexException, IOException { + + MockIndexMerger other; + + merge("0 10", "0 10"); + merge("1000 10 0 10", "1000 10 10", 4, 4); + merge("1000 0 0 10 0 0 10", "1000 10 0 10", 4); + merge("1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1", "1000 100 19 1 1 1 1 1"); + merge("1000 100 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0", "1000 100 10 0 0 0 0 0"); + merge("9 9 9 9 9 9 9 9 9 9","90"); + merge("10 10 10 10 10 10 10 10 10 10", "100"); + merge("100 100 100 100 100 100 100 100 100 100", "1000"); + merge("1000 10 x10 x10 x10", "1000 10 10 10 10"); + merge("1000 9 x9 x9 x9", "1000 9 9 9 9"); + merge("1000 10 x10 x10 x10", "1000 40", 4); + merge("1000 9 x9 x9 x9", "1000 36", 4); + merge("1000 0 10 0 10", "1000 20", 4); + + } + + public void testInconsistentMerge() + throws CorruptIndexException, IOException { + + merge("1001 101 11 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1001", "1120 1008"); + merge("1001 101 11 1 1 1 1 1 1 1 1 1 1 1 1 1 1 11", "1001 101 20 16"); + + } + + public void testChangeMergeFactor() + throws CorruptIndexException, IOException { + + MockIndexMerger other; + + set("11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11"); + policy.setMergeFactor(5); + policy.merge(merger.segmentInfos, merger); + other = new MockIndexMerger("275 11"); + assertEquals(other.toString(), merger.toString()); + + set("0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11"); + policy.setMergeFactor(5); + policy.merge(merger.segmentInfos, merger); + other = new MockIndexMerger("11"); + assertEquals(other.toString(), merger.toString()); + + set("0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10"); + policy.setMergeFactor(5); + policy.merge(merger.segmentInfos, merger); + other = new MockIndexMerger("0 10"); + assertEquals(other.toString(), merger.toString()); + } + + public void testOptimize() + throws CorruptIndexException, IOException { + + MockIndexMerger other; + + set ("1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000"); + + other = new MockIndexMerger("2124"); + policy.optimize(merger.segmentInfos, merger); + assertEquals(other.toString(), merger.toString()); + + set ("x1000"); + + other = new MockIndexMerger("1000"); + policy.optimize(merger.segmentInfos, merger); + assertEquals(other.toString(), merger.toString()); + + set ("1000 100 10 1"); + + other = new MockIndexMerger("1111"); + policy.optimize(merger.segmentInfos, merger); + assertEquals(other.toString(), merger.toString()); + + } + + public void testDirs() + throws CorruptIndexException, IOException { + + MockIndexMerger other; + + set ("100 10 1"); + + other = new MockIndexMerger("100 10 1"); + assertEquals(other.toString(), merger.toString()); + + other = new MockIndexMerger("100 x10 1"); + assertNotEquals(other.toString(), merger.toString()); + + set ("100 x10 1"); + policy.merge(merger.segmentInfos, merger); + + other = new MockIndexMerger("100 10 1"); + assertEquals(other.toString(), merger.toString()); + + } + + public void testInconsistent() { + + set("1001 1000 1000 101 100 11 1"); + assertTrue(policy.isConsistent(merger.segmentInfos, merger)); + + set("1001 1000 1001 1000 101 11 1"); + assertFalse(policy.isConsistent(merger.segmentInfos, merger)); + + set("1001 1000 1001 101 1000 101 11 1"); + assertFalse(policy.isConsistent(merger.segmentInfos, merger)); + + set("1001 11 101 1"); + assertFalse(policy.isConsistent(merger.segmentInfos, merger)); + + set("1001 101 11"); + assertTrue(policy.isConsistent(merger.segmentInfos, merger)); + + set("100001 10001 1001 101 11"); + try { + policy.isConsistent(merger.segmentInfos, merger); + fail(); + } catch (IllegalArgumentException e) { + } + + } + + public void testLowestConsistentBound() { + + set("101 101 1001 101 1"); + assertTrue(policy.lowestConsistentBound(merger.segmentInfos, merger) == + 10000); + + set("10000 1000 100 10 1 100 10 1"); + // System.err.println("? " + policy.lowestConsistentBound(merger.segmentInfos)); + assertTrue(policy.lowestConsistentBound(merger.segmentInfos, merger) == + 100); + + set("1000 1000 100 10 1 1000"); + assertTrue(policy.lowestConsistentBound(merger.segmentInfos, merger) == + 1000); + + set("1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000"); + assertTrue(policy.lowestConsistentBound(merger.segmentInfos, merger) == + 1000); + + } + +} Property changes on: src/test/org/apache/lucene/index/TestLogDocMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/MockIndexMerger.java =================================================================== --- src/test/org/apache/lucene/index/MockIndexMerger.java (revision 0) +++ src/test/org/apache/lucene/index/MockIndexMerger.java (revision 0) @@ -0,0 +1,264 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockRAMDirectory; + +import java.util.Map; +import java.util.HashMap; +import java.io.IOException; + +class MockIndexMerger implements IndexMerger { + + MergePolicy policy; + + public final static int DEFAULT_MAX_BUFFERED_DOCS = 10; + + private int maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS; + + public void setMaxBufferedDocs(int maxBufferedDocs) { + this.maxBufferedDocs = maxBufferedDocs; + } + + public int getMaxBufferedDocs() { + return maxBufferedDocs; + } + + private double ramBufferSizeMB = 0; + + public void setRAMBufferSizeMB(double ramBufferSizeMB) { + this.ramBufferSizeMB = ramBufferSizeMB; + } + + public double getRAMBufferSizeMB() { + return ramBufferSizeMB; + } + + void setMergePolicy(MergePolicy policy) { + this.policy = policy; + } + + public synchronized void optimize() + throws CorruptIndexException, IOException { + policy.optimize(segmentInfos, this); + } + + public synchronized void merge() + throws CorruptIndexException, IOException { + policy.merge(segmentInfos, this); + } + + MockIndexMerger(String string) { + + String[] strings = string.split("\\s"); + + segmentInfos = new SegmentInfos(); + + for(int i = 0; i < strings.length; i++) { + + StringBuffer buffer = new StringBuffer(strings[i]); + + boolean isCompoundFile = true; + boolean hasSingleNormsFile = true; + String dir = ""; + + while (buffer.charAt(0) < '0' || buffer.charAt(0) > '9') { + + char c = buffer.charAt(0); + buffer.deleteCharAt(0); + + switch(c) { + case 'c': isCompoundFile = true; break; + case 'C': isCompoundFile = false; break; + case 'x': dir = "x"; break; + case 'y': dir = "y"; break; + case 'z': dir = "z"; break; + } + + } + + int size = Integer.parseInt(buffer.toString()); + + SegmentInfo info = new SegmentInfo("name" + i, + size, + the(dir)); + info.setUseCompoundFile(isCompoundFile); + segmentInfos.addElement(info); + + } + } + + public void add(MockIndexMerger other) { + add(copy(other.segmentInfos)); + } + + protected void merge(SegmentInfo segment) { + } + + void add(SegmentInfos segments) { + synchronized(segmentInfos) { + segmentInfos.addAll(segments); + } + } + + static SegmentInfos copy(SegmentInfos segmentInfos) { + SegmentInfos segments = new SegmentInfos(); + synchronized(segmentInfos) { + segments.addAll(segmentInfos); + } + return segments; + } + + void replace(MergePolicy.MergeSpecification spec, SegmentInfo info) { + synchronized(segmentInfos) { + + SegmentInfos segments = spec.segments; + + for(int i = 0; i < segments.size(); i++) { + + int index = segmentInfos.indexOf(segments.info(i)); + + if (index < 0) { + throw new RuntimeException("could not replace segmentInfo"); + } + + if (i == 0) { + segmentInfos.set(index, info); + } else { + segmentInfos.removeElementAt(index); + } + + } + + } + } + + public int merge(MergePolicy.MergeSpecification spec) + throws CorruptIndexException, IOException { + + SegmentInfo info = new SegmentInfo("name", 0, directory); + info.setUseCompoundFile(spec.useCompoundFile); + + SegmentInfos segments = copy(spec.segments); + + int docCount = 0; + + for(int i = 0; i < segments.size(); i++) { + merge(segments.info(i)); + docCount += segments.info(i).docCount; + } + + info.docCount = docCount; + + replace(spec, info); + + return docCount; + } + + public Directory getDirectory() { + return directory; + } + + SegmentInfos segmentInfos; + Directory directory = the(); + + public String segString(SegmentInfos segments, + int first, + int beyond) { + + segments = copy(segments); + + StringBuffer buffer = new StringBuffer(); + + for(int i = first; i < beyond; i++) { + + if (i > first) { + buffer.append(' '); + } + + buffer.append('['); + buffer.append(i); + buffer.append(']'); + + SegmentInfo info = segments.info(i); + + try { + if (info.getUseCompoundFile()) { + buffer.append('c'); + } else { + buffer.append('C'); + } + } catch (Exception e) { + } + + if (info.dir != getDirectory()) { + buffer.append('x'); + } + + buffer.append(info.docCount); + + } + + return buffer.toString(); + + } + + public String segString(SegmentInfos segments) { + synchronized(segments) { + return segString(segments, 0, segments.size()); + } + } + + public String segString(MergePolicy.MergeSpecification spec) { + return segString(spec.segments); + } + + public String toString() { + return segString(segmentInfos); + } + + // utilities for managing a bunch of dirs + + static Map instances = new HashMap(); + + String name; + + static MockRAMDirectory the(String name) { + try { + if (!instances.containsKey(name)) { + instances.put(name, new MockRAMDirectory()); + } + return (MockRAMDirectory)instances.get(name); + } catch (IOException ioe) { + return null; + } + } + + static MockRAMDirectory the() { + return the(""); + } + + public String[] _list() { + return new String[0]; + } + +} + Property changes on: src/test/org/apache/lucene/index/MockIndexMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestMockIndexMerger.java =================================================================== --- src/test/org/apache/lucene/index/TestMockIndexMerger.java (revision 0) +++ src/test/org/apache/lucene/index/TestMockIndexMerger.java (revision 0) @@ -0,0 +1,89 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import junit.framework.TestCase; + +public class TestMockIndexMerger extends TestCase { + + void assertNotEquals(Object a, Object b) { + assertFalse(a.equals(b)); + } + + public void testMockIndexMerger() { + + assertEquals(new MockIndexMerger("1001 101 11 1").toString(), + new MockIndexMerger("1001 101 11 1").toString()); + + assertNotEquals(new MockIndexMerger("1001 101 11 1").toString(), + new MockIndexMerger("1001 101 11 11").toString()); + + } + + SegmentInfos segmentInfos(SegmentInfos all, int first, int last) { + SegmentInfos subset = new SegmentInfos(); + subset.addAll(all.subList(first, last)); + return subset; + } + + public void testUseCompoundFile() throws IOException { + + MockIndexMerger merger = new MockIndexMerger("c1001 C101"); + + assertTrue(merger.segmentInfos.info(0).getUseCompoundFile()); + assertFalse(merger.segmentInfos.info(1).getUseCompoundFile()); + + MergePolicy.MergeSpecification spec = new MergePolicy.MergeSpecification(); + spec.segments = segmentInfos(merger.segmentInfos, 0, 1); + spec.useCompoundFile = true; + + merger.merge(spec); + + assertTrue(merger.segmentInfos.info(0).getUseCompoundFile()); + assertFalse(merger.segmentInfos.info(1).getUseCompoundFile()); + + spec.segments = segmentInfos(merger.segmentInfos, 0, 1); + spec.useCompoundFile = false; + + merger.merge(spec); + + assertFalse(merger.segmentInfos.info(0).getUseCompoundFile()); + assertFalse(merger.segmentInfos.info(1).getUseCompoundFile()); + + spec.segments = segmentInfos(merger.segmentInfos, 1, 2); + spec.useCompoundFile = false; + + merger.merge(spec); + + assertFalse(merger.segmentInfos.info(0).getUseCompoundFile()); + assertFalse(merger.segmentInfos.info(1).getUseCompoundFile()); + + spec.segments = segmentInfos(merger.segmentInfos, 1, 2); + spec.useCompoundFile = true; + + merger.merge(spec); + + assertFalse(merger.segmentInfos.info(0).getUseCompoundFile()); + assertTrue(merger.segmentInfos.info(1).getUseCompoundFile()); + + + } + +} Property changes on: src/test/org/apache/lucene/index/TestMockIndexMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestIndexWriterDelete.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 566309) +++ src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy) @@ -32,9 +32,17 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.store.RAMDirectory; +import java.util.List; public class TestIndexWriterDelete extends TestCase { + class KeepAllDeletionPolicy implements IndexDeletionPolicy { + public void onInit(List commits) { + } + public void onCommit(List commits) { + } + } + // test the simple case public void testSimpleCase() throws IOException { String[] keywords = { "1", "2" }; @@ -49,7 +57,7 @@ Directory dir = new RAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); - modifier.setUseCompoundFile(true); + ((LogDocMergePolicy)modifier.getMergePolicy()).setUseCompoundFile(true); modifier.setMaxBufferedDeleteTerms(1); for (int i = 0; i < keywords.length; i++) { @@ -76,7 +84,7 @@ assertEquals(1, hitCount); if (!autoCommit) { modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); - modifier.setUseCompoundFile(true); + ((LogDocMergePolicy)modifier.getMergePolicy()).setUseCompoundFile(true); } modifier.deleteDocuments(term); if (!autoCommit) { @@ -445,15 +453,18 @@ } // Whether we succeeded or failed, check that all - // un-referenced files were in fact deleted (ie, - // we did not create garbage). Just create a - // new IndexFileDeleter, have it delete - // unreferenced files, then verify that in fact - // no files were deleted: + // un-referenced files were in fact deleted (ie, we did not + // create garbage). First we write the current segmentInfos + // out to disk. This will make sure the deleter doesn't + // delete files related to the current, unwritten + // state. Then create a new IndexFileDeleter with a keep all + // policy and have it delete unreferenced files and verify + // that in fact no files were deleted: + modifier.checkpoint(dir); String[] startFiles = dir.list(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); - IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null); + IndexFileDeleter d = new IndexFileDeleter(dir, new KeepAllDeletionPolicy(), infos, null , null); String[] endFiles = dir.list(); Arrays.sort(startFiles); @@ -496,6 +507,7 @@ Hits hits = null; try { hits = searcher.search(new TermQuery(searchTerm)); + // System.err.println("hits after: " + hits.length()); } catch (IOException e) { e.printStackTrace(); @@ -503,10 +515,16 @@ } int result2 = hits.length(); if (success) { - if (result2 != END_COUNT) { + // If we succeded, then we are in the second pass and have + // the results from the first. The results will depend on + // when the failure occured in the previous pass. All we + // guaranttee is that we will have either have seen all + // the deletes or udpates or none of them. + if (result2 != START_COUNT && result2 != END_COUNT) { + // System.err.println("*Q1 " + autoCommit + " " + updates + " **"); fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " - + result2 + " instead of expected " + END_COUNT); + + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); } } else { // On hitting exception we still may have added @@ -579,7 +597,7 @@ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); - modifier.setUseCompoundFile(true); + ((LogDocMergePolicy)modifier.getMergePolicy()).setUseCompoundFile(true); modifier.setMaxBufferedDeleteTerms(2); dir.failOn(failure.reset()); @@ -615,7 +633,7 @@ if (!autoCommit) { modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); - modifier.setUseCompoundFile(true); + ((LogDocMergePolicy)modifier.getMergePolicy()).setUseCompoundFile(true); } // delete the doc Index: src/test/org/apache/lucene/index/TestFieldsReader.java =================================================================== --- src/test/org/apache/lucene/index/TestFieldsReader.java (revision 566309) +++ src/test/org/apache/lucene/index/TestFieldsReader.java (working copy) @@ -46,7 +46,7 @@ DocHelper.setupDoc(testDoc); fieldInfos.add(testDoc); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); - writer.setUseCompoundFile(false); + ((LogDocMergePolicy)writer.getMergePolicy()).setUseCompoundFile(false); writer.addDocument(testDoc); writer.close(); } @@ -204,7 +204,7 @@ assertTrue(tmpDir != null); IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true); - writer.setUseCompoundFile(false); + ((LogDocMergePolicy)writer.getMergePolicy()).setUseCompoundFile(false); writer.addDocument(testDoc); writer.close(); Index: src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexReader.java (revision 566309) +++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -96,17 +96,17 @@ // add more documents writer = new IndexWriter(d, new StandardAnalyzer(), false); // want to get some more segments here - for (int i = 0; i < 5*writer.getMergeFactor(); i++) + for (int i = 0; i < 5*((LogDocMergePolicy)writer.getMergePolicy()).getMergeFactor(); i++) { addDocumentWithFields(writer); } // new fields are in some different segments (we hope) - for (int i = 0; i < 5*writer.getMergeFactor(); i++) + for (int i = 0; i < 5*((LogDocMergePolicy)writer.getMergePolicy()).getMergeFactor(); i++) { addDocumentWithDifferentFields(writer); } // new termvector fields - for (int i = 0; i < 5*writer.getMergeFactor(); i++) + for (int i = 0; i < 5*((LogDocMergePolicy)writer.getMergePolicy()).getMergeFactor(); i++) { addDocumentWithTermVectorFields(writer); } @@ -177,7 +177,7 @@ IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true); // want to get some more segments here // new termvector fields - for (int i = 0; i < 5 * writer.getMergeFactor(); i++) { + for (int i = 0; i < 5 * ((LogDocMergePolicy)writer.getMergePolicy()).getMergeFactor(); i++) { Document doc = new Document(); doc.add(new Field("tvnot","one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO)); doc.add(new Field("termvector","one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES)); @@ -430,7 +430,7 @@ // add 1 documents with term : aaa writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); - writer.setUseCompoundFile(false); + ((LogDocMergePolicy)writer.getMergePolicy()).setUseCompoundFile(false); addDoc(writer, searchTerm.text()); writer.close(); Index: src/test/org/apache/lucene/index/TestTermdocPerf.java =================================================================== --- src/test/org/apache/lucene/index/TestTermdocPerf.java (revision 566309) +++ src/test/org/apache/lucene/index/TestTermdocPerf.java (working copy) @@ -67,7 +67,7 @@ doc.add(new Field(field,val, Field.Store.NO, Field.Index.NO_NORMS)); IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.setMaxBufferedDocs(100); - writer.setMergeFactor(100); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(100); for (int i=0; iThe default value is {@link Integer#MAX_VALUE}. + */ + public void setMaxMergeDocs(int maxMergeDocs) { + if (maxMergeDocs < 2) + throw new IllegalArgumentException("maxMergeDocs must at least be 2"); + this.maxMergeDocs = maxMergeDocs; + } + + /** See {@link #setMaxMergeDocs(int)} + */ + public int getMaxMergeDocs() { + return maxMergeDocs; + } + + private int mergeFactor = DEFAULT_MERGE_FACTOR; + + /** Determines how often segment indices are merged during addDocument(). With + * smaller values, less RAM is used while indexing, and searches on + * unoptimized indices are faster, but indexing speed is slower. With larger + * values, more RAM is used during indexing, and while searches on unoptimized + * indices are slower, indexing is faster. Thus larger values (> 10) are best + * for batch index creation, and smaller values (< 10) for indices that are + * interactively maintained. + * + *

This must never be less than 2. The default value is 10. + */ + public void setMergeFactor(int mergeFactor) { + if (mergeFactor < 2) + throw new IllegalArgumentException("mergeFactor must at least be 2"); + this.mergeFactor = mergeFactor; + } + + /** + * @see #setMergeFactor + */ + public int getMergeFactor() { + return mergeFactor; + } + + /* a ConsistencyState object wraps the merge invariants: + * + * 1) that the list of segments, viewed from the end, is a + * monotonically increasing in level (the log of the number of + * documents (not including deleted documents)). + * + * 2) All segments are owned by the destination directory. + * + * Inconsistencies are measured in three ways: + * + * lowestConsistentBound: if not zero, it is the lowest logarithmic + * level above which logarithmic monotonicity holds. E.g., (assuming + * minMergeDocs = mergeFactor = 10 + * LCB(100 10 1) == 0 + * LCB(100 1 10) == 100 + * LCB(1 10 100) == 1000 + * + * firstInconsistentSegment: when the invariants aren't met, the + * firstInconsistentSegment is the index of, well, the first + * inconsistent segment, e.g., (same givens as above) + * FIS(100 1 10) = 1 + * + * inconsistentDirectories: a list of directories not contained by + * the merge which need to be copied if not otherwise merged + * + * Notes: + * + * Consistency in this context does not include checking that the number of segments at a level is <= M + * but the merge policy will still consolidate those. + * + */ + + class ConsistencyState { + + Vector inconsistentDirectories = new Vector(); + int lowestConsistentBound = 0; + int firstInconsistentSegment = -1; + + ConsistencyState(SegmentInfos segmentInfos, IndexMerger merger) { + + int previousBound = -1; + int currentBound = merger.getMaxBufferedDocs(); + + for (int i = segmentInfos.size()-1; i >=0; i--) { + + SegmentInfo info = segmentInfos.info(i); + + if (info.dir != merger.getDirectory()) { + inconsistentDirectories.add(0, new Integer(i)); + } + + int docCount = info.docCount; + + if (docCount <= previousBound) { + lowestConsistentBound = currentBound; + firstInconsistentSegment = i; + } + + while (docCount > currentBound) { + + previousBound = currentBound; + currentBound *= mergeFactor; + + if (currentBound > maxMergeDocs) { + throw new IllegalArgumentException("No segment size can exceed maxMergeDocs"); + } + } + + } + + } + + boolean isConsistent() { + return lowestConsistentBound == 0 && inconsistentDirectories.isEmpty(); + } + + } + + int lowestConsistentBound(SegmentInfos segmentInfos, IndexMerger merger) { + return new ConsistencyState(segmentInfos, merger).lowestConsistentBound; + } + + protected int merge(MergeSpecification spec, IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + int docCount = merger.merge(spec); + return docCount; + } + + SegmentInfos segmentInfos(SegmentInfos all, int first, int last) { + SegmentInfos subset = new SegmentInfos(); + subset.addAll(all.subList(first, last)); + return subset; + } + + static SegmentInfos copy(SegmentInfos segmentInfos) { + SegmentInfos result = new SegmentInfos(); + result.addAll(segmentInfos); + return result; + } + + /* The primary method for handling merges is cascadingMerge. It + * takes a consistent list of segments and generates primitive + * merges until the logarithmic invariants are met. + * + * This function takes two parameters that override the "default" + * behaviour: + * + * firstBound: this parameter effectively overrides minMergeDocs for + * one call, the result being that all segments with less than + * firstBound docs will be considered to be at the same level. This + * behaviour is used when making an inconsistent index consistent: + * cascadingMerge is called with firstBound = leastConsistentBound. + * + * firstSegmentToConsider: this is used to manually set the left + * boundary of the subsequence. It also causes a merge even if the + * number of segments that would be merged is less than + * mergeFactor. It is used as part of the process of making an index + * consistent. + * + * Differences from the non-factored version: + * + * Prcoessing of non-minimum sized segments: this function, rather + * than staring at minimimum sized segments and stopping if none are + * found, looks at the lowest level found and checks it, stopping if + * it is okay. This is designed to complement the consitency checker + * which handles lists where levels are not contiguous but allows + * segments which are over populated (too big) but logartihmically + * monotonic. + * + * Notes on concurrency: if the concurrent merge work goes forward, + * this function needs to be changed structurally. When concurrent + * can occur, segmentInfos can change over time and in a way + * predictable in the current thread. In particular, during a merge, + * segmentInfos can change a lot and thus the loop below which + * expects to do multiple merges based on data gleaned before + * beginning the merges probably needs to change. + */ + + public void cascadingMerge(SegmentInfos segmentInfos, + IndexMerger merger, + int firstBound, + int firstSegmentToConsider) + throws MergeException, CorruptIndexException, IOException { + + long previousBound = -1; + long currentBound = firstBound; + + assert currentBound != 0; + + /* Determine the bounds for the smallest existing segment */ + + int firstCount = firstCount = segmentInfos.info(segmentInfos.size()-1).docCount; + while (firstCount > currentBound) { + previousBound = currentBound; + currentBound *= mergeFactor; + } + + /* find the contigiuos subseguence of segments on this level, + * searching from the right */ + + while (currentBound <= maxMergeDocs) { + + /* starts like this: */ + /* abcdefg */ + /*r l*/ + /* and iterates by pulling the left pointer towards the left */ + + int left = segmentInfos.size(); + int right = -1; + + /* search from the right ... */ + + while(--left >= 0) { + + int docCount = segmentInfos.info(left).docCount; + + /* first time a segment is in bounded, set right */ + + if (right == -1 && docCount > previousBound && docCount <= currentBound) { + right = left; + } + + /* first time a segment is above bound, stop, leaving left where it is + * else continue, which pulls left towards the left */ + + if (docCount > currentBound) { + break; + } + + } + + int first = left + 1; + + if (firstSegmentToConsider >= 0) { + first = firstSegmentToConsider; + } + + int length = right - first + 1; + + /* intoNext level is used to determine how to handle the results + * of a merge. If the merge results in a segment in the next + * level, a cascade will ensue. Otherwise, the merge has created + * a segment at the same level as the pervious segments and is a + * candidate for further merging at that level. + */ + + boolean intoNextLevel = false; + + MergeSpecification spec = new MergeSpecification(); + spec.useCompoundFile = useCompoundFile; + + while (length > 0) { + + int mergeLength = mergeFactor; + + /* Normally, if the length of the subsequence is less than + * mergeFactor, no merge will occur. THis is modified if first + * has been overriden, as described above. + */ + + if (length < mergeLength) { + if (firstSegmentToConsider == -1) { + break; + } + mergeLength = length; + } + + int last = first + mergeLength; + + /* Create the subsquence. Note this creates a new segmentInfos + * (so it's not == to the one owned by the merger). This + * should help in concurrency but is not alone sufficient. + */ + + spec.segments = segmentInfos(segmentInfos, first, last); + + int docCount = 0; + + docCount = merge(spec, merger); + + length -= mergeLength; + + firstSegmentToConsider = -1; + + if (docCount == -1) { + + /* If the merge ends up being concurrent, we don't know the + * number of documents that will result. So we just look at + * the rest of the segments. Note, though, that the bump by + * mergeLength assumes that segmentInfos hasn't been updated + * before this code is run, which is not safe. That's an + * example of where this needs to be tweaked for + * concurrency. Partly this could be handled by going to a + * rescrusvie rather than itteractive structure. But there + * are other issues that needs to be explored / understood ... + */ + + first += mergeLength; + + } else if (docCount > currentBound) { + + /* If the new segment is into the next level, record that + * fact and continue with other segments at this level. */ + + first++; + intoNextLevel = true; + + } else { + + /* The new segment is still in the current level, so just + * add it back to this list of segments on this level */ + + length++; + + } + + } + + /* At this point, there aren't maxMergeSegments left to be + * merged at this level. If at some point we created a segment + * at the next level, we need to check it, so we'll fall through + * the break. Otherwise, the break fires, indicating that no + * further merges are needed to ensure consistency (as long as + * we were already consistent). + */ + if (!intoNextLevel) { + break; + } + + previousBound = currentBound; + currentBound *= mergeFactor; + + } + + } + + public void cascadingMerge(SegmentInfos segmentInfos, + IndexMerger merger, + int firstBound) + throws MergeException, CorruptIndexException, IOException { + cascadingMerge(segmentInfos, merger, firstBound, /* firstSegmentToConsider = */ -1); + } + + public void cascadingMerge(SegmentInfos segmentInfos, IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + cascadingMerge(segmentInfos, + merger, + /* firstBound = */ merger.getMaxBufferedDocs(), + /* firstSegmentToConsider = */ -1); + } + + public boolean isConsistent(SegmentInfos segmentInfos, + IndexMerger merger) { + return ((new ConsistencyState(segmentInfos, merger)).isConsistent()); + } + + /* Use the merger to copy a squence of segments */ + void copy(SegmentInfos segmentInfos, + IndexMerger merger, + int first, + int last) + throws MergeException, CorruptIndexException, IOException { + + MergeSpecification spec = new MergeSpecification(); + spec.segments = segmentInfos(segmentInfos, first, last); + spec.useCompoundFile = useCompoundFile; + + merge(spec, merger); + + } + + /* This is the "alternative" to cascadingMerge. It is used to take + * an inconsistent list of segments and make them consistent. It + * uses cascadingMerge and copy to do this. + * + * This code is based closely on the non-factored version. + * + * Differences from the non-factored version: + * + * In the non-factored veresion, IndexWriter#addIndexesNoOptimize + * has most of the code for making an index consistent. It's + * necessary in this function since it's purpose is to aggregate a + * number of indexes, in which case it's highlty unlikely the + * resulitng sequnce will follow the logarithmic + * invariants. However, there are other reasons an index will not be + * consistent. It may well not be consistent if parameters + * change. Deletes can also cause merges that result in + * inconsistency (I think). + * + * makeConsistent is supposed to take an inconsistent list and make + * it consistent. It does this without regard for what caused the + * inconsistency. Beacuse it makes fewer assumptions, the algorithm + * is slightly different than addIndexesNoOptimize and the results + * can be different in a few cases (though the invariants will still + * hold). + */ + + boolean makeConsistent(SegmentInfos segmentInfos, IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + + ConsistencyState state = new ConsistencyState(segmentInfos, merger); + + if (state.isConsistent()) { + return false; + } + + /* If the segment list is not composed of continguous subsequences + * at monotonically increasing levels, use cascadingMerge with + * firstBound == the lowest consistent bound. This will devide + * the list into two pieces: a (possibly empty) consistent set on + * the left and an inconsistent set on the right, e.g., + * + * 1000 1000 10 100 10 + * < con > < incon > + * + * Starting with the left-most inconsistent segment, + * cascadingMerge will merge mergeFactor segments at a time using + * it's normal algorithm for the given level. + */ + + if (state.lowestConsistentBound > 0) { + cascadingMerge(segmentInfos, merger, state.lowestConsistentBound); + state = new ConsistencyState(segmentInfos, merger); + } + + /* cascadingMerge will stop when there are less than + * maxMergeSegments, but there may still be inconsistency in the last + * segments, of which there may be up to mergeFactor - 1 segments. + * For example, we could be left with + * + * 1000 100 100 10 100 + * + * with M = 4. Since the first inconsistency is only one from the end, + * it will not get merged. + * + * We use cascadingMerge again, but force it to merge these + * segments by specifying the firstInconsistentSegment. + */ + + if (state.lowestConsistentBound > 0) { + cascadingMerge(segmentInfos, + merger, + state.lowestConsistentBound, + state.firstInconsistentSegment); + } + + /* All of these merges will (should?) have left the sequence + * consistent, but there may be further possible merges. Try */ + + cascadingMerge(segmentInfos, merger); + + state = new ConsistencyState(segmentInfos, merger); + + /* Finally, it's possible this merger doesn't even own some of + * these segments. If all of the earlier merges left some external + * segments untouched, copy them in unchanged (except for garbage + * collected deleted docs */ + + if (state.inconsistentDirectories.size() > 0) { + + for(int i = 0; i < state.inconsistentDirectories.size(); i++) { + + int segment = ((Integer)(state.inconsistentDirectories.elementAt(i))).intValue(); + copy(segmentInfos, merger, segment, segment+1); + } + + /* The copy could have generated segments that, via deleted doc + * gc, no longer obey the invariants. So just try the whole + * thing again. This might be overkill? Or maybe we should + * actually be doing this in more cases? + */ + + makeConsistent(segmentInfos, merger); + + } + + return true; + + } + + protected void checkedMerge(SegmentInfos segmentInfos, + IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + + assert segmentInfos.size() > 0; + + /* Since any merge can result in inconsistency, we have to run + * makeConsistent aftewards anyway. Would it be better to combine + * the code and put it in a while loop? Would that be too much + * extra computation? Not sure yet ... + */ + + if (!makeConsistent(segmentInfos, merger)) { + cascadingMerge(segmentInfos, merger); + makeConsistent(segmentInfos, merger); + } + + } + + /** + *

The amount of free space required when a merge is + * triggered is up to 1X the size of all segments being + * merged, when no readers/searchers are open against the + * index, and up to 2X the size of all segments being + * merged when readers/searchers are open against the + * index (see {@link #optimize()} for details). Most + * merges are small (merging the smallest segments + * together), but whenever a full merge occurs (all + * segments in the index, which is the worst case for + * temporary space usage) then the maximum free disk space + * required is the same as {@link #optimize}.

+ */ + + protected void checkedOptimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + + if (segmentInfos.size() > 0) { + + while (checkOptimize(segmentInfos, merger)|| + (useCompoundFile && + (!SegmentReader.usesCompoundFile(segmentInfos.info(0))))){ + MergeSpecification spec = new MergeSpecification(); + + int first = segmentInfos.size() - mergeFactor; + first = first < 0 ? 0 : first; + int last = segmentInfos.size(); + + spec.segments = segmentInfos(segmentInfos, first, last); + spec.useCompoundFile = useCompoundFile; + + merge(spec, merger); + } + + } + + } + + static public String segString(SegmentInfos segmentInfos, + int first, + int beyond, + IndexMerger merger) { + + StringBuffer buffer = new StringBuffer(); + + for(int i = first; i < beyond; i++) { + + if (i > first) { + buffer.append(' '); + } + + buffer.append('['); + buffer.append(i); + buffer.append(']'); + + SegmentInfo info = segmentInfos.info(i); + + try { + if (info.getUseCompoundFile()) { + buffer.append('c'); + } else { + buffer.append('C'); + } + } catch (Exception e) { + } + + if (info.dir != merger.getDirectory()) { + buffer.append('x'); + } + + buffer.append(info.docCount); + + } + + return buffer.toString(); + + } + + public String segString(SegmentInfos segmentInfos, + IndexMerger merger, + int first, + int beyond) { + + return segString(segmentInfos, merger, first, beyond); + } + + static public String segString(SegmentInfos segmentInfos, + IndexMerger merger) { + return segString(segmentInfos, 0, segmentInfos.size(), merger); + } + + static public String segString(MergeSpecification spec, IndexMerger merger) { + return segString(spec.segments, merger); + } + +} Property changes on: src/java/org/apache/lucene/index/LogDocMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/MergePolicyBase.java =================================================================== --- src/java/org/apache/lucene/index/MergePolicyBase.java (revision 0) +++ src/java/org/apache/lucene/index/MergePolicyBase.java (revision 0) @@ -0,0 +1,61 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +abstract class MergePolicyBase implements MergePolicy { + + protected boolean checkOptimize(SegmentInfos segmentInfos, + IndexMerger merger) throws IOException { + return ((segmentInfos.size() > 1) || + SegmentReader.hasDeletions(segmentInfos.info(0)) || + SegmentReader.hasSeparateNorms(segmentInfos.info(0)) || + (segmentInfos.info(0).dir != merger.getDirectory())); + } + + public void merge(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException { + + try { + checkedMerge(segmentInfos, merger); + } catch (MergeException me) { + throw new RuntimeException (me); + } + } + + public void optimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException { + try { + checkedOptimize(segmentInfos, merger); + } catch (MergeException me) { + throw new RuntimeException (me); + } + } + + abstract protected void checkedMerge(SegmentInfos segmentInfos, + IndexMerger merger) + throws MergeException, CorruptIndexException, IOException; + + abstract protected void checkedOptimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws MergeException, CorruptIndexException, IOException; + +} Property changes on: src/java/org/apache/lucene/index/MergePolicyBase.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/IndexModifier.java =================================================================== --- src/java/org/apache/lucene/index/IndexModifier.java (revision 566282) +++ src/java/org/apache/lucene/index/IndexModifier.java (working copy) @@ -101,12 +101,25 @@ // Lucene defaults: protected PrintStream infoStream = null; - protected boolean useCompoundFile = true; + + private MergePolicy mergePolicy = new LogDocMergePolicy(); + + /** + * @deprecated + * @see LogDocMergePolicy#getUseCompoundFile(boolean) + */ + protected boolean _useCompoundFile; + protected int maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; protected int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH; - protected int mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR; /** + * @deprecated + * @see: LogDocMergePolicy.DEFAULT_MERGE_FACTOR + */ + protected int _mergeFactor = LogDocMergePolicy.DEFAULT_MERGE_FACTOR; + + /** * Open an index with write access. * * @param directory the index directory @@ -180,7 +193,7 @@ * Throw an IllegalStateException if the index is closed. * @throws IllegalStateException */ - protected void assureOpen() { + protected void ensureOpen() { if (!open) { throw new IllegalStateException("Index is closed"); } @@ -202,11 +215,10 @@ } indexWriter = new IndexWriter(directory, analyzer, false); indexWriter.setInfoStream(infoStream); - indexWriter.setUseCompoundFile(useCompoundFile); if (maxBufferedDocs != 0) indexWriter.setMaxBufferedDocs(maxBufferedDocs); indexWriter.setMaxFieldLength(maxFieldLength); - indexWriter.setMergeFactor(mergeFactor); + indexWriter.setMergePolicy(mergePolicy, /* doClose = */ false); } } @@ -218,6 +230,7 @@ protected void createIndexReader() throws CorruptIndexException, IOException { if (indexReader == null) { if (indexWriter != null) { + indexWriter.flush(); indexWriter.close(); indexWriter = null; } @@ -235,8 +248,9 @@ */ public void flush() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { + indexWriter.flush(); indexWriter.close(); indexWriter = null; createIndexWriter(); @@ -263,7 +277,7 @@ */ public void addDocument(Document doc, Analyzer docAnalyzer) throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); if (docAnalyzer != null) indexWriter.addDocument(doc, docAnalyzer); @@ -307,7 +321,7 @@ */ public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexReader(); return indexReader.deleteDocuments(term); } @@ -326,7 +340,7 @@ */ public void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexReader(); indexReader.deleteDocument(docNum); } @@ -341,7 +355,7 @@ */ public int docCount() { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { return indexWriter.docCount(); } else { @@ -363,7 +377,7 @@ */ public void optimize() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); indexWriter.optimize(); } @@ -378,7 +392,7 @@ */ public void setInfoStream(PrintStream infoStream) { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { indexWriter.setInfoStream(infoStream); } @@ -396,42 +410,65 @@ */ public PrintStream getInfoStream() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); return indexWriter.getInfoStream(); } } /** - * Setting to turn on usage of a compound file. When on, multiple files - * for each segment are merged into a single file once the segment creation - * is finished. This is done regardless of what directory is in use. - * @see IndexWriter#setUseCompoundFile(boolean) - * @throws IllegalStateException if the index is closed + * Set the merge policy used by this IndexModifier */ - public void setUseCompoundFile(boolean useCompoundFile) { - synchronized(directory) { - assureOpen(); + public void setMergePolicy(MergePolicy mp) throws CorruptIndexException, IOException { + ensureOpen(); + if (mergePolicy != null && mergePolicy != mp) { if (indexWriter != null) { - indexWriter.setUseCompoundFile(useCompoundFile); + indexWriter.flush(); } - this.useCompoundFile = useCompoundFile; + mergePolicy.close(); } + mergePolicy = mp; + if (indexWriter != null) { + indexWriter.setMergePolicy(mergePolicy, /* doClose = */ false); + } } /** - * @see IndexModifier#setUseCompoundFile(boolean) + * @see #setMergePolicy(MergePolicy) + */ + public MergePolicy getMergePolicy() { + return mergePolicy; + } + + + /** + * @deprecated + * @see LogDocMergePolicy#setUseCompoundFile(boolean) + * @throws IllegalStateException if the index is closed + */ + public void _setUseCompoundFile(boolean value) { + try { + ((LogDocMergePolicy) getMergePolicy()).setUseCompoundFile(value); + _useCompoundFile = ((LogDocMergePolicy) getMergePolicy()).getUseCompoundFile(); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("setUseCompoundFile can only be called on LogDocMergePolicy"); + } + } + + /** + * @deprecated + * @see LogDocMergePolicy#getUseCompoundFile(boolean) * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer * has this index open (write.lock could not * be obtained) * @throws IOException if there is a low-level IO error */ - public boolean getUseCompoundFile() throws CorruptIndexException, LockObtainFailedException, IOException { - synchronized(directory) { - assureOpen(); - createIndexWriter(); - return indexWriter.getUseCompoundFile(); + public boolean _getUseCompoundFile() throws CorruptIndexException, LockObtainFailedException, IOException { + try { + return ((LogDocMergePolicy) getMergePolicy()).getUseCompoundFile(); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("getUseCompoundFile can only be called on LogDocMergePolicy"); } } @@ -451,7 +488,7 @@ */ public void setMaxFieldLength(int maxFieldLength) { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { indexWriter.setMaxFieldLength(maxFieldLength); } @@ -469,7 +506,7 @@ */ public int getMaxFieldLength() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); return indexWriter.getMaxFieldLength(); } @@ -479,8 +516,7 @@ * Determines the minimal number of documents required before the buffered * in-memory documents are merging and a new Segment is created. * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory}, - * large value gives faster indexing. At the same time, mergeFactor limits - * the number of files open in a FSDirectory. + * large value gives faster indexing. * *

The default value is 10. * @@ -490,7 +526,7 @@ */ public void setMaxBufferedDocs(int maxBufferedDocs) { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { indexWriter.setMaxBufferedDocs(maxBufferedDocs); } @@ -508,36 +544,30 @@ */ public int getMaxBufferedDocs() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); return indexWriter.getMaxBufferedDocs(); } } /** - * Determines how often segment indices are merged by addDocument(). With - * smaller values, less RAM is used while indexing, and searches on - * unoptimized indices are faster, but indexing speed is slower. With larger - * values, more RAM is used during indexing, and while searches on unoptimized - * indices are slower, indexing is faster. Thus larger values (> 10) are best - * for batch index creation, and smaller values (< 10) for indices that are - * interactively maintained. - *

This must never be less than 2. The default value is 10. - * - * @see IndexWriter#setMergeFactor(int) + * @deprecated + * @see LogDocMergePolicy#setMergeFactor(int) * @throws IllegalStateException if the index is closed */ - public void setMergeFactor(int mergeFactor) { + public void _setMergeFactor(int mergeFactor) { synchronized(directory) { - assureOpen(); - if (indexWriter != null) { - indexWriter.setMergeFactor(mergeFactor); + ensureOpen(); + try { + ((LogDocMergePolicy) getMergePolicy()).setMergeFactor(mergeFactor); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("setMergeFactor can only be called on LogDocMergePolicy"); } - this.mergeFactor = mergeFactor; } } /** + * @deprecated * @see IndexModifier#setMergeFactor(int) * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer @@ -545,11 +575,14 @@ * be obtained) * @throws IOException if there is a low-level IO error */ - public int getMergeFactor() throws CorruptIndexException, LockObtainFailedException, IOException { + public int _getMergeFactor() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); - createIndexWriter(); - return indexWriter.getMergeFactor(); + ensureOpen(); + try { + return ((LogDocMergePolicy) getMergePolicy()).getMergeFactor(); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("getMergeFactor can only be called on LogDocMergePolicy"); + } } } @@ -565,12 +598,16 @@ if (!open) throw new IllegalStateException("Index is closed already"); if (indexWriter != null) { + indexWriter.flush(); indexWriter.close(); indexWriter = null; } else { indexReader.close(); indexReader = null; } + if (mergePolicy != null) { + mergePolicy.close(); + } open = false; } } Index: src/java/org/apache/lucene/index/IndexMerger.java =================================================================== --- src/java/org/apache/lucene/index/IndexMerger.java (revision 0) +++ src/java/org/apache/lucene/index/IndexMerger.java (revision 0) @@ -0,0 +1,80 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; + +import java.io.IOException; + +/** + * The set of operations needed by a MergePolicy object + * to trigger individual primitive merge operations. Currently only + * implemented by IndexWriter. + */ + +interface IndexMerger { + + /** + * The primary primitive merge operation + * + * @param m specification of the desired merge + * @return number of documents in the new segment (or -1 if indeterminate) + */ + + int merge(MergePolicy.MergeSpecification m) + throws CorruptIndexException, IOException; + + /** + * Identifies the target directory of a merge so that the merge + * policy can determine if segments need to be copied between + * directories. + * + * @return the directory that new segments will be created within. + */ + Directory getDirectory(); + + /** + * Returns 0 if this writer is flushing by RAM usage, else + * returns the number of buffered added documents that will + * trigger a flush. + * @see #setMaxBufferedDocs + */ + int getMaxBufferedDocs(); + + /** + * Returns 0.0 if this writer is flushing by document + * count, else returns the value set by {@link + * #setRAMBufferSizeMB}. + */ + double getRAMBufferSizeMB(); + + /** + * High-level optimize request; generally will result in one or more + * primiteve merge requests via the merge policy. + */ + void optimize() + throws CorruptIndexException, IOException; + + /** + * High-level merge request; generally will result in one or more + * primitive merge requests via the merge policy. + */ + void merge() + throws CorruptIndexException, IOException; + +} Property changes on: src/java/org/apache/lucene/index/IndexMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/MergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/MergePolicy.java (revision 0) +++ src/java/org/apache/lucene/index/MergePolicy.java (revision 0) @@ -0,0 +1,118 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * A merge policy determines the sequence of primitive merge + * operations to be used for overall merge and optimize + * operations. Each merge or optimize call on the merge policy results + * in zero or more primitive merge calls on an + * IndexMerger. The merge policy is called by + * IndexWriter with the current list of all segments and + * it, in turn, calls to an IndexMerger (typically the + * writer, again), with individual merge requests, each of which + * generates a single new segment from the contents of a subset of all + * segments. + */ + +/* Notes: a merge policy piggybacks on the synchronization of the + * caller (persuably the merger, e.g., IndexWriter). Non-concurrent + * mergers do little internal synchronization. + */ + +interface MergePolicy { + + /** + * A merge specification provides the information necessary to + * perform an individual primitive merge operation, resulting in a + * single new segment. The merge spec includes the subset of + * segments to be merged as well as various options such as whether + * the new segment should or should not be in compound format. + */ + + public static class MergeSpecification implements Cloneable { + /** + * The subset of segments to be included in the primitive merge. + */ + public SegmentInfos segments; + + /** + * Indicate the format to be used in the resulting segment. + */ + public boolean useCompoundFile; + + /** + * Publicly avaliable clone(). + */ + public MergeSpecification copy() { + try { + return (MergeSpecification)super.clone(); + } catch (CloneNotSupportedException cnse) { + throw new RuntimeException ("clone not supported on a MergeSpecification subclass"); + } + } + } + + /** + * Merge a sequence of segments. The policy determines what set of + * primitive merge operations constitute a high-level merge. + * + * @param segmentInfos the total set of segments in the index + */ + void merge(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException; + + /** + * Optimize a sequence of segments. The policy determines what set of + * primitive merge operations constitute an optimize. + * + * @param segmentInfos the total set of segments in the index + */ + void optimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException; + + /** + * Release all resources for the policy. + */ + void close(); + + /** + * Returns an indication of whether a new (not from merge) segment + * should be compound or not. + */ + boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment); + + /** + * Returns an indication of whether doc store files should be compound + * or not. + */ + boolean useCompoundDocStore(SegmentInfos segments); + + /* MergeException is a placeholder for other exceptions in derived + * classes. SerialMerge policies never throws it so if it catches + * one, so if they catch one, it turns into a runtime error, which it is. + */ + + class MergeException extends Exception { + } + +} Property changes on: src/java/org/apache/lucene/index/MergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 566282) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -161,7 +161,7 @@ * referenced by the "front" of the index). For this, IndexFileDeleter * keeps track of the last non commit checkpoint. */ -public class IndexWriter { +public class IndexWriter implements IndexMerger { /** * Default value for the write lock timeout (1,000). @@ -177,9 +177,10 @@ public static final String WRITE_LOCK_NAME = "write.lock"; /** - * Default value is 10. Change using {@link #setMergeFactor(int)}. + * @deprecated + * @see: LogDocMergePolicy.DEFAULT_MERGE_FACTOR */ - public final static int DEFAULT_MERGE_FACTOR = 10; + public final static int _DEFAULT_MERGE_FACTOR = LogDocMergePolicy.DEFAULT_MERGE_FACTOR; /** * Default value is 10. Change using {@link #setMaxBufferedDocs(int)}. @@ -205,9 +206,10 @@ public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000; /** - * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}. + * @deprecated + * @see: LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS */ - public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; + public final static int _DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; /** * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}. @@ -239,7 +241,7 @@ private boolean localAutoCommit; // saved autoCommit during local transaction private boolean autoCommit = true; // false if we should commit only on close - SegmentInfos segmentInfos = new SegmentInfos(); // the segments + private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private DocumentsWriter docWriter; private IndexFileDeleter deleter; @@ -255,14 +257,8 @@ // The key is delete term; the value is number of ram // segments the term applies to. private HashMap bufferedDeleteTerms = new HashMap(); - private int numBufferedDeleteTerms = 0; + /* private */ int numBufferedDeleteTerms = 0; - /** Use compound file setting. Defaults to true, minimizing the number of - * files used. Setting this to false may improve indexing performance, but - * may also cause file handle problems. - */ - private boolean useCompoundFile = true; - private boolean closeDir; private boolean closed; @@ -278,23 +274,30 @@ } } - /** Get the current setting of whether to use the compound file format. - * Note that this just returns the value you set with setUseCompoundFile(boolean) - * or the default. You cannot use this to query the status of an existing index. - * @see #setUseCompoundFile(boolean) + /** + * @deprecated + * @see LogDocMergePolicy#getUseCompoundFile() + * */ - public boolean getUseCompoundFile() { - ensureOpen(); - return useCompoundFile; + public boolean _getUseCompoundFile() { + try { + return ((LogDocMergePolicy) getMergePolicy()).getUseCompoundFile(); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("getUseCompoundFile can only be called on LogDocMergePolicy"); + } } - /** Setting to turn on usage of a compound file. When on, multiple files - * for each segment are merged into a single file once the segment creation - * is finished. This is done regardless of what directory is in use. + /** + * @deprecated + * @see LogDocMergePolicy#setUseCompoundFile(boolean) + * */ - public void setUseCompoundFile(boolean value) { - ensureOpen(); - useCompoundFile = value; + public void _setUseCompoundFile(boolean value) { + try { + ((LogDocMergePolicy) getMergePolicy()).setUseCompoundFile(value); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("setUseCompoundFile can only be called on LogDocMergePolicy"); + } } /** Expert: Set the Similarity implementation used by this IndexWriter. @@ -652,29 +655,60 @@ } } - /** Determines the largest number of documents ever merged by addDocument(). - * Small values (e.g., less than 10,000) are best for interactive indexing, - * as this limits the length of pauses while indexing to a few seconds. - * Larger values are best for batched indexing and speedier searches. - * - *

The default value is {@link Integer#MAX_VALUE}. + private MergePolicy mergePolicy = new LogDocMergePolicy(); + private boolean doMergeClose; + + /** + * Set the merge policy used by this IndexWriter */ - public void setMaxMergeDocs(int maxMergeDocs) { + public void setMergePolicy(MergePolicy mp, boolean doClose) { ensureOpen(); - this.maxMergeDocs = maxMergeDocs; + if (mergePolicy != null && mergePolicy != mp && doMergeClose) { + mergePolicy.close(); + } + mergePolicy = mp; + doMergeClose = doClose; } + public void setMergePolicy(MergePolicy mp) { + setMergePolicy(mp, true); + } + /** * Returns the largest number of documents allowed in a * single segment. * @see #setMaxMergeDocs */ - public int getMaxMergeDocs() { + public MergePolicy getMergePolicy() { ensureOpen(); - return maxMergeDocs; + return mergePolicy; } /** + * @deprecated + * @see MergePolicy#setMaxMergeDocs(int) + */ + public void _setMaxMergeDocs(int maxMergeDocs) { + try { + ((LogDocMergePolicy) getMergePolicy()).setMaxMergeDocs(maxMergeDocs); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("setMaxMergeDocs can only be called on LogDocMergePolicy"); + } + } + + /** + * @deprecated + * @see MergePolicy#getMaxMergeDocs() + */ + public int _getMaxMergeDocs() { + try { + return ((LogDocMergePolicy) getMergePolicy()).getMaxMergeDocs(); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("getMaxMergeDocs can only be called on LogDocMergePolicy"); + } + } + + /** * The maximum number of terms that will be indexed for a single field in a * document. This limits the amount of memory required for indexing, so that * collections with very large files will not crash the indexing process by @@ -717,6 +751,7 @@ * @throws IllegalArgumentException if maxBufferedDocs is * smaller than 2 * @see #setRAMBufferSizeMB + * */ public void setMaxBufferedDocs(int maxBufferedDocs) { ensureOpen(); @@ -788,32 +823,28 @@ return maxBufferedDeleteTerms; } - /** Determines how often segment indices are merged by addDocument(). With - * smaller values, less RAM is used while indexing, and searches on - * unoptimized indices are faster, but indexing speed is slower. With larger - * values, more RAM is used during indexing, and while searches on unoptimized - * indices are slower, indexing is faster. Thus larger values (> 10) are best - * for batch index creation, and smaller values (< 10) for indices that are - * interactively maintained. - * - *

This must never be less than 2. The default value is 10. + /** + * @deprecated + * @see LogDocMergePolicy#setMergeFactor(int) */ - public void setMergeFactor(int mergeFactor) { - ensureOpen(); - if (mergeFactor < 2) - throw new IllegalArgumentException("mergeFactor cannot be less than 2"); - this.mergeFactor = mergeFactor; + public void _setMergeFactor(int mergeFactor) { + try { + ((LogDocMergePolicy) getMergePolicy()).setMergeFactor(mergeFactor); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("getUseCompoundFile can only be called on LogDocMergePolicy"); + } } /** - * Returns the number of segments that are merged at once - * and also controls the total number of segments allowed - * to accumulate in the index. - * @see #setMergeFactor + * @deprecated + * @see LogDocMergePolicy#getMergeFactor */ - public int getMergeFactor() { - ensureOpen(); - return mergeFactor; + public int _getMergeFactor() { + try { + return ((LogDocMergePolicy) getMergePolicy()).getMergeFactor(); + } catch (ClassCastException cce) { + throw new IllegalArgumentException("getUseCompoundFile can only be called on LogDocMergePolicy"); + } } /** If non-null, this will be the default infoStream used @@ -839,7 +870,6 @@ */ public void setInfoStream(PrintStream infoStream) { ensureOpen(); - this.infoStream = infoStream; docWriter.setInfoStream(infoStream); deleter.setInfoStream(infoStream); } @@ -935,6 +965,13 @@ rollbackSegmentInfos = null; } + if (mergePolicy != null && doMergeClose) { + if (doMergeClose){ + mergePolicy.close(); + } + mergePolicy = null; + } + if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; @@ -948,11 +985,15 @@ } /** Tells the docWriter to close its currently open shared - * doc stores (stored fields & vectors files). */ - private void flushDocStores() throws IOException { + * doc stores (stored fields & vectors files). + * Return value specifices whether new doc store files are compound or not. + */ + private boolean flushDocStores() throws IOException { List files = docWriter.files(); + boolean useCompoundDocStore = false; + if (files.size() > 0) { String docStoreSegment; @@ -965,7 +1006,9 @@ docWriter.abort(); } - if (useCompoundFile && docStoreSegment != null) { + useCompoundDocStore = mergePolicy.useCompoundDocStore(segmentInfos); + + if (useCompoundDocStore && docStoreSegment != null) { // Now build compound doc store file checkpoint(); @@ -1006,6 +1049,8 @@ deleter.checkpoint(segmentInfos, false); } } + + return useCompoundDocStore; } /** Release the write lock, if needed. */ @@ -1073,23 +1118,19 @@ *

This method periodically flushes pending documents * to the Directory (every {@link #setMaxBufferedDocs}), * and also periodically merges segments in the index - * (every {@link #setMergeFactor} flushes). When this + * by calling {@link MergePolicy#setMergeFactor}. When this * occurs, the method will take more time to run (possibly * a long time if the index is large), and will require * free temporary space in the Directory to do the * merging.

* - *

The amount of free space required when a merge is - * triggered is up to 1X the size of all segments being - * merged, when no readers/searchers are open against the - * index, and up to 2X the size of all segments being - * merged when readers/searchers are open against the - * index (see {@link #optimize()} for details). Most - * merges are small (merging the smallest segments - * together), but whenever a full merge occurs (all - * segments in the index, which is the worst case for - * temporary space usage) then the maximum free disk space - * required is the same as {@link #optimize}.

+ *

The amount of free space required when a merge is triggered is + * up to 1X the size of all segments being merged, when no + * readers/searchers are open against the index, and up to 2X the + * size of all segments being merged when readers/searchers are open + * against the index (see {@link #optimize()} for details). The + * sequence of primitive merge operations performed is governed by + * the merge policy. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error @@ -1117,6 +1158,8 @@ try { success = docWriter.addDocument(doc, analyzer); } catch (IOException ioe) { + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; deleter.refresh(); throw ioe; } @@ -1192,6 +1235,8 @@ try { success = docWriter.addDocument(doc, analyzer); } catch (IOException ioe) { + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; deleter.refresh(); throw ioe; } @@ -1224,51 +1269,33 @@ return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } - /** Determines how often segment indices are merged by addDocument(). With - * smaller values, less RAM is used while indexing, and searches on - * unoptimized indices are faster, but indexing speed is slower. With larger - * values, more RAM is used during indexing, and while searches on unoptimized - * indices are slower, indexing is faster. Thus larger values (> 10) are best - * for batch index creation, and smaller values (< 10) for indices that are - * interactively maintained. - * - *

This must never be less than 2. The default value is {@link #DEFAULT_MERGE_FACTOR}. - - */ - private int mergeFactor = DEFAULT_MERGE_FACTOR; - /** Determines amount of RAM usage by the buffered docs at * which point we trigger a flush to the index. */ private double ramBufferSize = DEFAULT_RAM_BUFFER_SIZE_MB*1024F*1024F; - /** Determines the largest number of documents ever merged by addDocument(). - * Small values (e.g., less than 10,000) are best for interactive indexing, - * as this limits the length of pauses while indexing to a few seconds. - * Larger values are best for batched indexing and speedier searches. - * - *

The default value is {@link #DEFAULT_MAX_MERGE_DOCS}. - - */ - private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; - /** If non-null, information about merges will be printed to this. */ private PrintStream infoStream = null; - private static PrintStream defaultInfoStream = null; - /** Merges all segments together into a single segment, - * optimizing an index for search. + /** + * Requests an "optimize" operation on an index, priming the index + * for the fastest available search. Traditionally this has meant + * merging all segments into a single segment as is done in the + * default merge policy, but individaul merge policies may implement + * optimize in different ways. * + * @see LogDocMergePolicy#optimize(SegmentInfos) + * *

It is recommended that this method be called upon completion of indexing. In * environments with frequent updates, optimize is best done during low volume times, if at all. * *

*

See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.

* - *

Note that this requires substantial temporary free + *

Note that this can require substantial temporary free * space in the Directory (see LUCENE-764 * for details):

@@ -1306,7 +1333,7 @@ *

The actual temporary usage could be much less than * these figures (it depends on many factors).

* - *

Once the optimize completes, the total size of the + *

In general, once the optimize completes, the total size of the * index will be less than the size of the starting index. * It could be quite a bit smaller (if there were many * pending deletes) or just slightly smaller.

@@ -1326,18 +1353,21 @@ public synchronized void optimize() throws CorruptIndexException, IOException { ensureOpen(); flush(); - while (segmentInfos.size() > 1 || - (segmentInfos.size() == 1 && - (SegmentReader.hasDeletions(segmentInfos.info(0)) || - SegmentReader.hasSeparateNorms(segmentInfos.info(0)) || - segmentInfos.info(0).dir != directory || - (useCompoundFile && - !segmentInfos.info(0).getUseCompoundFile())))) { - int minSegment = segmentInfos.size() - mergeFactor; - mergeSegments(minSegment < 0 ? 0 : minSegment, segmentInfos.size()); - } + mergePolicy.optimize(segmentInfos, this); } + /** + * Requests a merge operation on an index. In memory data is first + * flushed to disk and then the merge policy called. + * + * Explict calls to merge() are usually not necessary. The most + * common case when they would be used are when merge policy + * parameters change or when concurrent merges are used. + */ + public synchronized void merge() throws CorruptIndexException, IOException { + flush(); + } + /* * Begin a transaction. During a transaction, any segment * merges that happen (or ram segments flushed) will not @@ -1351,7 +1381,7 @@ * within the transactions, so they must be flushed before the * transaction is started. */ - private void startTransaction() throws IOException { + void startTransaction() throws IOException { assert numBufferedDeleteTerms == 0 : "calling startTransaction with buffered delete terms not supported"; @@ -1374,7 +1404,7 @@ * Rolls back the transaction and restores state to where * we were at the start. */ - private void rollbackTransaction() throws IOException { + void rollbackTransaction() throws IOException { // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; @@ -1403,7 +1433,7 @@ * segments file and remove and pending deletions we have * accumulated during the transaction */ - private void commitTransaction() throws IOException { + void commitTransaction() throws IOException { // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; @@ -1451,6 +1481,10 @@ segmentInfos.clear(); segmentInfos.addAll(rollbackSegmentInfos); + // discard any buffered delete terms so they aren't applied in close() + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; + docWriter.abort(); // Ask deleter to locate unreferenced files & remove @@ -1458,9 +1492,6 @@ deleter.checkpoint(segmentInfos, false); deleter.refresh(); - bufferedDeleteTerms.clear(); - numBufferedDeleteTerms = 0; - commitPending = false; docWriter.abort(); close(); @@ -1537,7 +1568,7 @@ throws CorruptIndexException, IOException { ensureOpen(); - optimize(); // start with zero or 1 seg + flush(); int start = segmentInfos.size(); @@ -1554,15 +1585,8 @@ } } - // merge newly added segments in log(n) passes - while (segmentInfos.size() > start+mergeFactor) { - for (int base = start; base < segmentInfos.size(); base++) { - int end = Math.min(segmentInfos.size(), base+mergeFactor); - if (end-base > 1) { - mergeSegments(base, end); - } - } - } + mergePolicy.optimize(segmentInfos, this); + success = true; } finally { if (success) { @@ -1571,8 +1595,7 @@ rollbackTransaction(); } } - - optimize(); // final cleanup + } /** @@ -1594,40 +1617,10 @@ */ public synchronized void addIndexesNoOptimize(Directory[] dirs) throws CorruptIndexException, IOException { - // Adding indexes can be viewed as adding a sequence of segments S to - // a sequence of segments T. Segments in T follow the invariants but - // segments in S may not since they could come from multiple indexes. - // Here is the merge algorithm for addIndexesNoOptimize(): - // - // 1 Flush ram. - // 2 Consider a combined sequence with segments from T followed - // by segments from S (same as current addIndexes(Directory[])). - // 3 Assume the highest level for segments in S is h. Call - // maybeMergeSegments(), but instead of starting w/ lowerBound = -1 - // and upperBound = maxBufferedDocs, start w/ lowerBound = -1 and - // upperBound = upperBound of level h. After this, the invariants - // are guaranteed except for the last < M segments whose levels <= h. - // 4 If the invariants hold for the last < M segments whose levels <= h, - // if some of those < M segments are from S (not merged in step 3), - // properly copy them over*, otherwise done. - // Otherwise, simply merge those segments. If the merge results in - // a segment of level <= h, done. Otherwise, it's of level h+1 and call - // maybeMergeSegments() starting w/ upperBound = upperBound of level h+1. - // - // * Ideally, we want to simply copy a segment. However, directory does - // not support copy yet. In addition, source may use compound file or not - // and target may use compound file or not. So we use mergeSegments() to - // copy a segment, which may cause doc count to change because deleted - // docs are garbage collected. - // 1 flush ram - ensureOpen(); flush(); - // 2 copy segment infos and find the highest level from dirs - int startUpperBound = docWriter.getMaxBufferedDocs(); - /* new merge policy if (startUpperBound == 0) startUpperBound = 10; @@ -1650,64 +1643,13 @@ for (int j = 0; j < sis.size(); j++) { SegmentInfo info = sis.info(j); segmentInfos.addElement(info); // add each info - - while (startUpperBound < info.docCount) { - startUpperBound *= mergeFactor; // find the highest level from dirs - if (startUpperBound > maxMergeDocs) { - // upper bound cannot exceed maxMergeDocs - throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); - } - } } } - // 3 maybe merge segments starting from the highest level from dirs - maybeMergeSegments(startUpperBound); + mergePolicy.merge(segmentInfos, this); - // get the tail segments whose levels <= h - int segmentCount = segmentInfos.size(); - int numTailSegments = 0; - while (numTailSegments < segmentCount - && startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) { - numTailSegments++; - } - if (numTailSegments == 0) { - success = true; - return; - } - - // 4 make sure invariants hold for the tail segments whose levels <= h - if (checkNonDecreasingLevels(segmentCount - numTailSegments)) { - // identify the segments from S to be copied (not merged in 3) - int numSegmentsToCopy = 0; - while (numSegmentsToCopy < segmentCount - && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) { - numSegmentsToCopy++; - } - if (numSegmentsToCopy == 0) { - success = true; - return; - } - - // copy those segments from S - for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { - mergeSegments(i, i + 1); - } - if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { - success = true; - return; - } - } - - // invariants do not hold, simply merge those segments - mergeSegments(segmentCount - numTailSegments, segmentCount); - - // maybe merge segments again if necessary - if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { - maybeMergeSegments(startUpperBound * mergeFactor); - } - success = true; + } finally { if (success) { commitTransaction(); @@ -1717,14 +1659,9 @@ } } - /** Merges the provided indexes into this index. - *

After this completes, the index is optimized.

+ /** + *

See {@link #addIndexes(Directory[])} *

The provided IndexReaders are not closed.

- - *

See {@link #addIndexes(Directory[])} for - * details on transactional semantics, temporary free - * space required in the Directory, and non-CFS segments - * on an Exception.

* @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ @@ -1732,108 +1669,16 @@ throws CorruptIndexException, IOException { ensureOpen(); - optimize(); // start with zero or 1 seg - final String mergedName = newSegmentName(); - SegmentMerger merger = new SegmentMerger(this, mergedName); - - SegmentInfo info; - - IndexReader sReader = null; - try { - if (segmentInfos.size() == 1){ // add existing index, if any - sReader = SegmentReader.get(segmentInfos.info(0)); - merger.add(sReader); - } - - for (int i = 0; i < readers.length; i++) // add new indexes - merger.add(readers[i]); - - boolean success = false; - - startTransaction(); - - try { - int docCount = merger.merge(); // merge 'em - - if(sReader != null) { - sReader.close(); - sReader = null; - } - - segmentInfos.setSize(0); // pop old infos & add new - info = new SegmentInfo(mergedName, docCount, directory, false, true, - -1, null, false); - segmentInfos.addElement(info); - - success = true; - - } finally { - if (!success) { - rollbackTransaction(); - } else { - commitTransaction(); - } - } - } finally { - if (sReader != null) { - sReader.close(); - } + Directory[] dirs = new Directory[ readers.length ]; + for(int i = 0; i < dirs.length; i++) { + dirs[i] = readers[i].directory(); } - - if (useCompoundFile) { - boolean success = false; + addIndexes(dirs); - startTransaction(); - - try { - merger.createCompoundFile(mergedName + ".cfs"); - info.setUseCompoundFile(true); - } finally { - if (!success) { - rollbackTransaction(); - } else { - commitTransaction(); - } - } - } } - // Overview of merge policy: - // - // A flush is triggered either by close() or by the number of ram segments - // reaching maxBufferedDocs. After a disk segment is created by the flush, - // further merges may be triggered. - // - // LowerBound and upperBound set the limits on the doc count of a segment - // which may be merged. Initially, lowerBound is set to 0 and upperBound - // to maxBufferedDocs. Starting from the rightmost* segment whose doc count - // > lowerBound and <= upperBound, count the number of consecutive segments - // whose doc count <= upperBound. - // - // Case 1: number of worthy segments < mergeFactor, no merge, done. - // Case 2: number of worthy segments == mergeFactor, merge these segments. - // If the doc count of the merged segment <= upperBound, done. - // Otherwise, set lowerBound to upperBound, and multiply upperBound - // by mergeFactor, go through the process again. - // Case 3: number of worthy segments > mergeFactor (in the case mergeFactor - // M changes), merge the leftmost* M segments. If the doc count of - // the merged segment <= upperBound, consider the merged segment for - // further merges on this same level. Merge the now leftmost* M - // segments, and so on, until number of worthy segments < mergeFactor. - // If the doc count of all the merged segments <= upperBound, done. - // Otherwise, set lowerBound to upperBound, and multiply upperBound - // by mergeFactor, go through the process again. - // Note that case 2 can be considerd as a special case of case 3. - // - // This merge policy guarantees two invariants if M does not change and - // segment doc count is not reaching maxMergeDocs: - // B for maxBufferedDocs, f(n) defined as ceil(log_M(ceil(n/B))) - // 1: If i (left*) and i+1 (right*) are two consecutive segments of doc - // counts x and y, then f(x) >= f(y). - // 2: The number of committed segments on the same level (f(n)) <= M. - // This is called after pending added and deleted // documents have been flushed to the Directory but before // the change is committed (new segments_N file written). @@ -1906,11 +1751,12 @@ // apply to more than just the last flushed segment boolean flushDeletes = bufferedDeleteTerms.size() > 0; - if (infoStream != null) + if (true && infoStream != null) infoStream.println(" flush: flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + - " numDocs=" + numDocs); + " numDocs=" + numDocs + + " numBufDelTerms=" + numBufferedDeleteTerms); int docStoreOffset = docWriter.getDocStoreOffset(); boolean docStoreIsCompoundFile = false; @@ -1923,9 +1769,8 @@ if (infoStream != null) infoStream.println(" flush shared docStore segment " + docStoreSegment); - flushDocStores(); + docStoreIsCompoundFile = flushDocStores(); flushDocStores = false; - docStoreIsCompoundFile = useCompoundFile; } String segment = docWriter.getSegment(); @@ -1988,6 +1833,9 @@ segmentInfos.clear(); segmentInfos.addAll(rollback); + // nocommit + // System.err.println("restore buf deletes " + saveNumBufferedDeleteTerms + " " + + // numBufferedDeleteTerms); if (saveBufferedDeleteTerms != null) { numBufferedDeleteTerms = saveNumBufferedDeleteTerms; bufferedDeleteTerms = saveBufferedDeleteTerms; @@ -2009,7 +1857,8 @@ deleter.checkpoint(segmentInfos, autoCommit); - if (flushDocs && useCompoundFile) { + if (flushDocs && mergePolicy.useCompoundFile(segmentInfos, + newSegment)) { success = false; try { docWriter.createCompoundFile(segment); @@ -2026,14 +1875,15 @@ deleter.checkpoint(segmentInfos, autoCommit); } - /* new merge policy - if (0 == docWriter.getMaxBufferedDocs()) - maybeMergeSegments(mergeFactor * numDocs / 2); - else - maybeMergeSegments(docWriter.getMaxBufferedDocs()); - */ - if (triggerMerge) - maybeMergeSegments(docWriter.getMaxBufferedDocs()); + if (triggerMerge) { + /* new merge policy + if (0 == docWriter.getMaxBufferedDocs()) + mergePolicy.merge(segmentInfos,mergeFactor * numDocs / 2); + else + mergePolicy.merge(segmentInfos,docWriter.getMaxBufferedDocs()); + */ + mergePolicy.merge(segmentInfos, this); + } } } finally { docWriter.clearFlushPending(); @@ -2057,79 +1907,48 @@ return docWriter.getNumDocsInRAM(); } - /** Incremental segment merger. */ - private final void maybeMergeSegments(int startUpperBound) throws CorruptIndexException, IOException { - long lowerBound = -1; - long upperBound = startUpperBound; + /* FIXME if we want to support non-contiguous segment merges */ + synchronized void replace(MergePolicy.MergeSpecification spec, SegmentInfo info) { - /* new merge policy - if (upperBound == 0) upperBound = 10; - */ + int first = segmentInfos.indexOf(spec.segments.info(0)); + int last = segmentInfos.indexOf(spec.segments.info(spec.segments.size() - 1)); - while (upperBound < maxMergeDocs) { - int minSegment = segmentInfos.size(); - int maxSegment = -1; + last++; - // find merge-worthy segments - while (--minSegment >= 0) { - SegmentInfo si = segmentInfos.info(minSegment); + if (!(first >= 0) || !(last >= 1) || !(last - first == spec.segments.size())) { + throw new RuntimeException("bad replace spec"); + } - if (maxSegment == -1 && si.docCount > lowerBound && si.docCount <= upperBound) { - // start from the rightmost* segment whose doc count is in bounds - maxSegment = minSegment; - } else if (si.docCount > upperBound) { - // until the segment whose doc count exceeds upperBound - break; - } - } + // nocommit + // System.err.println("before ++"); + for(int i=0; i < segmentInfos.size(); i++) { + // System.err.println(segmentInfos.info(i).name); + } - minSegment++; - maxSegment++; - int numSegments = maxSegment - minSegment; + segmentInfos.subList(first, last).clear(); + segmentInfos.add(first, info); - if (numSegments < mergeFactor) { - break; - } else { - boolean exceedsUpperLimit = false; - - // number of merge-worthy segments may exceed mergeFactor when - // mergeFactor and/or maxBufferedDocs change(s) - while (numSegments >= mergeFactor) { - // merge the leftmost* mergeFactor segments - - int docCount = mergeSegments(minSegment, minSegment + mergeFactor); - numSegments -= mergeFactor; - - if (docCount > upperBound) { - // continue to merge the rest of the worthy segments on this level - minSegment++; - exceedsUpperLimit = true; - } else { - // if the merged segment does not exceed upperBound, consider - // this segment for further merges on this same level - numSegments++; - } - } - - if (!exceedsUpperLimit) { - // if none of the merged segments exceed upperBound, done - break; - } - } - - lowerBound = upperBound; - upperBound *= mergeFactor; + // nocommit + // System.err.println("after ++"); + for(int i=0; i < segmentInfos.size(); i++) { + // System.err.println(segmentInfos.info(i).name); } + } /** - * Merges the named range of segments, replacing them in the stack with a + * Merges the indicated segments, replacing them in the stack with a * single segment. */ - private final int mergeSegments(int minSegment, int end) + public final int merge(MergePolicy.MergeSpecification spec) throws CorruptIndexException, IOException { + SegmentInfos sourceSegments = spec.segments; + + int minSegment = 0; + int end = sourceSegments.size(); + final String mergedName = newSegmentName(); SegmentMerger merger = null; @@ -2156,7 +1975,7 @@ // Test each segment to be merged for (int i = minSegment; i < end; i++) { - SegmentInfo si = segmentInfos.info(i); + SegmentInfo si = sourceSegments.info(i); // If it has deletions we must merge the doc stores if (si.hasDeletions()) @@ -2207,7 +2026,7 @@ docStoreSegment = null; docStoreIsCompoundFile = false; } else { - SegmentInfo si = segmentInfos.info(minSegment); + SegmentInfo si = sourceSegments.info(minSegment); docStoreOffset = si.getDocStoreOffset(); docStoreSegment = si.getDocStoreSegment(); docStoreIsCompoundFile = si.getDocStoreIsCompoundFile(); @@ -2223,7 +2042,7 @@ merger = new SegmentMerger(this, mergedName); for (int i = minSegment; i < end; i++) { - SegmentInfo si = segmentInfos.info(i); + SegmentInfo si = sourceSegments.info(i); if (infoStream != null) infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); IndexReader reader = SegmentReader.get(si, MERGE_READ_BUFFER_SIZE, mergeDocStores); // no need to set deleter (yet) @@ -2249,13 +2068,10 @@ docStoreSegment, docStoreIsCompoundFile); - rollback = (SegmentInfos) segmentInfos.clone(); + rollback = (SegmentInfos) sourceSegments.clone(); - for (int i = end-1; i > minSegment; i--) // remove old infos & add new - segmentInfos.remove(i); + replace(spec, newSegment); - segmentInfos.set(minSegment, newSegment); - checkpoint(); success = true; @@ -2267,8 +2083,8 @@ // instances, but keep original SegmentInfos // instance (so we don't try to write again the // same segments_N file -- write once): - segmentInfos.clear(); - segmentInfos.addAll(rollback); + sourceSegments.clear(); + sourceSegments.addAll(rollback); } // Delete any partially created and now unreferenced files: @@ -2285,6 +2101,8 @@ // Give deleter a chance to remove files now. deleter.checkpoint(segmentInfos, autoCommit); + boolean useCompoundFile = spec.useCompoundFile; + if (useCompoundFile) { boolean success = false; @@ -2311,6 +2129,7 @@ return mergedDocCount; } + // Called during flush to apply any buffered deletes. If // flushedNewSegment is true then a new segment was just // created and flushed from the ram segments, so we will @@ -2333,6 +2152,7 @@ // Apply delete terms to the segment just flushed from ram // apply appropriately so that a delete term is only applied to // the documents buffered before it, not those buffered after it. + // System.err.println("apply deletes to new"); // nocommit applyDeletesSelectively(bufferedDeleteTerms, reader); } finally { if (reader != null) { @@ -2357,6 +2177,7 @@ // Apply delete terms to disk segments // except the one just flushed from ram. + // System.err.println("apply deletes to " + i); // nocommit applyDeletes(bufferedDeleteTerms, reader); } finally { if (reader != null) { @@ -2370,7 +2191,6 @@ } // Clean up bufferedDeleteTerms. - // Rollbacks of buffered deletes are based on restoring the old // map, so don't modify this one. Rare enough that the gc // overhead is almost certainly lower than the alternate, which @@ -2381,29 +2201,6 @@ } } - private final boolean checkNonDecreasingLevels(int start) { - int lowerBound = -1; - int upperBound = docWriter.getMaxBufferedDocs(); - - /* new merge policy - if (upperBound == 0) - upperBound = 10; - */ - - for (int i = segmentInfos.size() - 1; i >= start; i--) { - int docCount = segmentInfos.info(i).docCount; - if (docCount <= lowerBound) { - return false; - } - - while (docCount > upperBound) { - lowerBound = upperBound; - upperBound *= mergeFactor; - } - } - return true; - } - // For test purposes. final synchronized int getBufferedDeleteTermsSize() { return bufferedDeleteTerms.size(); @@ -2438,6 +2235,7 @@ private void bufferDeleteTerm(Term term) { Num num = (Num) bufferedDeleteTerms.get(term); int numDoc = docWriter.getNumDocsInRAM(); + // System.err.println("buf delete " + term); // nocommit if (num == null) { bufferedDeleteTerms.put(term, new Num(numDoc)); } else { @@ -2480,7 +2278,47 @@ Iterator iter = deleteTerms.entrySet().iterator(); while (iter.hasNext()) { Entry entry = (Entry) iter.next(); + // System.err.println("apply " + entry.getKey()); // nocommit reader.deleteDocuments((Term) entry.getKey()); } } + + // utitliy routines for tests + SegmentInfo newestSegment() { + return segmentInfos.info(segmentInfos.size()-1); + } + + void checkpoint(Directory dir) throws IOException { + segmentInfos.write(dir); + } + + public String segString() { + StringBuffer buffer = new StringBuffer(); + for(int i = 0; i < segmentInfos.size(); i++) { + if (i > 0) { + buffer.append(' '); + } + + SegmentInfo info = segmentInfos.info(i); + + try { + if (info.getUseCompoundFile()) { + buffer.append('c'); + } else { + buffer.append('C'); + } + } catch (Exception e) { + } + + if (info.dir != getDirectory()) { + buffer.append('x'); + } + + buffer.append(info.docCount); + + } + + return buffer.toString(); + } + } Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 566282) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -122,6 +122,7 @@ throws CorruptIndexException, IOException { this.docWriter = docWriter; + infoStream = null; // nocommit this.infoStream = infoStream; if (infoStream != null) Index: contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java =================================================================== --- contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java (revision 566282) +++ contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java (working copy) @@ -35,6 +35,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LogDocMergePolicy; /** * Convert the prolog file wn_s.pl from the WordNet prolog download @@ -243,9 +244,9 @@ // override the specific index if it already exists IndexWriter writer = new IndexWriter(indexDir, ana, true); - writer.setUseCompoundFile(true); // why? + ((LogDocMergePolicy)writer.getMergePolicy()).setUseCompoundFile(true); // why? // blindly up these parameters for speed - writer.setMergeFactor( writer.getMergeFactor() * 2); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(((LogDocMergePolicy)writer.getMergePolicy()).getMergeFactor() * 2); writer.setMaxBufferedDocs( writer.getMaxBufferedDocs() * 2); Iterator i1 = word2Nums.keySet().iterator(); while (i1.hasNext()) // for each word Index: contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java =================================================================== --- contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java (revision 566282) +++ contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.document.DateTools; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Searcher; @@ -261,12 +262,12 @@ IndexWriter writer = new IndexWriter(indexDir, analyzer, create); - writer.setUseCompoundFile(useCompoundIndex); + ((LogDocMergePolicy)writer.getMergePolicy()).setUseCompoundFile(useCompoundIndex); int totalFiles = 0; int totalIndexed = 0; int totalIgnored = 0; try { - writer.setMergeFactor(mergeFactor); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(mergeFactor); for (int i = 0; i < filesets.size(); i++) { FileSet fs = (FileSet) filesets.get(i); Index: contrib/gdata-server/src/core/src/test/org/apache/lucene/gdata/search/index/TestGdataIndexWriter.java =================================================================== --- contrib/gdata-server/src/core/src/test/org/apache/lucene/gdata/search/index/TestGdataIndexWriter.java (revision 566282) +++ contrib/gdata-server/src/core/src/test/org/apache/lucene/gdata/search/index/TestGdataIndexWriter.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.gdata.search.config.IndexSchema; import org.apache.lucene.gdata.search.config.IndexSchemaField; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.index.LogDocMergePolicy; /** * @@ -72,10 +73,10 @@ // assertEquals(VALUE_GT_DEFAULT_LONG,writer.getCommitLockTimeout()); assertEquals(VALUE_GT_DEFAULT_LONG,writer.getWriteLockTimeout()); assertEquals(VALUE_GT_DEFAULT_INT,writer.getMaxBufferedDocs()); - assertEquals(VALUE_GT_DEFAULT_INT,writer.getMaxMergeDocs()); + assertEquals(VALUE_GT_DEFAULT_INT,((LogDocMergePolicy)writer.getMergePolicy()).getMaxMergeDocs()); assertEquals(VALUE_GT_DEFAULT_INT,writer.getMaxFieldLength()); - assertEquals(VALUE_GT_DEFAULT_INT,writer.getMergeFactor()); - assertTrue(writer.getUseCompoundFile()); + assertEquals(VALUE_GT_DEFAULT_INT,((LogDocMergePolicy)writer.getMergePolicy()).getMergeFactor()); + assertTrue(((LogDocMergePolicy)writer.getMergePolicy()).getUseCompoundFile()); } } Index: contrib/gdata-server/src/core/src/java/org/apache/lucene/gdata/search/index/GDataIndexWriter.java =================================================================== --- contrib/gdata-server/src/core/src/java/org/apache/lucene/gdata/search/index/GDataIndexWriter.java (revision 566282) +++ contrib/gdata-server/src/core/src/java/org/apache/lucene/gdata/search/index/GDataIndexWriter.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.gdata.search.config.IndexSchema; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.store.Directory; /** @@ -39,13 +40,13 @@ private void initialize(IndexSchema config) { this.serviceName = config.getName(); - setUseCompoundFile(config.isUseCompoundFile()); + ((LogDocMergePolicy)getMergePolicy()).setUseCompoundFile(config.isUseCompoundFile()); if (config.getMaxBufferedDocs() != IndexSchema.NOT_SET_VALUE) setMaxBufferedDocs(config.getMaxBufferedDocs()); if (config.getMaxMergeDocs() != IndexSchema.NOT_SET_VALUE) - setMaxMergeDocs(config.getMaxMergeDocs()); + ((LogDocMergePolicy)getMergePolicy()).setMaxMergeDocs(config.getMaxMergeDocs()); if (config.getMergeFactor() != IndexSchema.NOT_SET_VALUE) - setMergeFactor(config.getMergeFactor()); + ((LogDocMergePolicy)getMergePolicy()).setMergeFactor(config.getMergeFactor()); if (config.getMaxFieldLength() != IndexSchema.NOT_SET_VALUE) setMaxFieldLength(config.getMaxFieldLength()); if (config.getWriteLockTimeout() != IndexSchema.NOT_SET_VALUE) Index: contrib/benchmark/conf/indexEnwiki.alg =================================================================== --- contrib/benchmark/conf/indexEnwiki.alg (revision 0) +++ contrib/benchmark/conf/indexEnwiki.alg (revision 0) @@ -0,0 +1,30 @@ + analyzer=org.apache.lucene.analysis.SimpleAnalyzer + + directory=FSDirectory + + ram.flush.mb=64 + + max.field.length=2147483647 + + compound=false + max.buffered=70000 + + doc.add.log.step=5000 + + docs.file=work/enwiki.txt + + doc.maker=org.apache.lucene.benchmark.byTask.feeds.LineDocMaker + + doc.tokenized=true + doc.maker.forever=false + + ResetSystemErase + CreateIndex + { "All" + {AddDoc}: * + } + + CloseIndex + + RepSumByPref All + RepSumByPref AddDoc Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java (revision 566282) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/standard/StandardBenchmarker.java (working copy) @@ -14,6 +14,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; @@ -134,10 +135,10 @@ trd.startRun(); trd.setId(String.valueOf(i)); IndexWriter iw = new IndexWriter(params.getDirectory(), params.getAnalyzer(), true); - iw.setMergeFactor(params.getMergeFactor()); + ((LogDocMergePolicy)iw.getMergePolicy()).setMergeFactor(params.getMergeFactor()); iw.setMaxBufferedDocs(params.getMaxBufferedDocs()); - iw.setUseCompoundFile(params.isCompound()); + ((LogDocMergePolicy)iw.getMergePolicy()).setUseCompoundFile(params.isCompound()); makeIndex(trd, params.getSource(), iw, true, true, false, options); if (params.isOptimize()) { Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (revision 566282) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; +import org.apache.lucene.index.LogDocMergePolicy; import java.io.IOException; @@ -62,8 +63,8 @@ // must update params for newly opened writer writer.setMaxBufferedDocs(mxbf); writer.setMaxFieldLength(mxfl); - writer.setMergeFactor(mrgf); - writer.setUseCompoundFile(cmpnd); // this one redundant? + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(mrgf); + ((LogDocMergePolicy)writer.getMergePolicy()).setUseCompoundFile(cmpnd); // this one redundant? if (flushAtRAMUsage > 0) writer.setRAMBufferSizeMB(flushAtRAMUsage); Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 566282) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; +import org.apache.lucene.index.LogDocMergePolicy; import java.io.IOException; @@ -54,8 +55,8 @@ IndexWriter iw = new IndexWriter(dir, autoCommit, analyzer, true); - iw.setUseCompoundFile(cmpnd); - iw.setMergeFactor(mrgf); + ((LogDocMergePolicy)iw.getMergePolicy()).setUseCompoundFile(cmpnd); + ((LogDocMergePolicy)iw.getMergePolicy()).setMergeFactor(mrgf); iw.setMaxBufferedDocs(mxbf); iw.setMaxFieldLength(mxfl); if (flushAtRAMUsage > 0) Index: contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java =================================================================== --- contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (revision 566282) +++ contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Hits; @@ -311,7 +312,7 @@ IndexReader.unlock(spellIndex); IndexWriter writer = new IndexWriter(spellIndex, new WhitespaceAnalyzer(), !IndexReader.indexExists(spellIndex)); - writer.setMergeFactor(300); + ((LogDocMergePolicy)writer.getMergePolicy()).setMergeFactor(300); writer.setMaxBufferedDocs(150); Iterator iter = dict.getWordsIterator();