Index: src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (revision 567805) +++ src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (working copy) @@ -198,30 +198,31 @@ int ramSegmentCount = writer.getNumBufferedDocuments(); assertTrue(ramSegmentCount < maxBufferedDocs); - int lowerBound = -1; - int upperBound = maxBufferedDocs; + int previousBound = -1; + int currentBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.getSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.getDocCount(i); - assertTrue(docCount > lowerBound); - if (docCount <= upperBound) { + assertTrue(docCount > previousBound); + + if (docCount <= currentBound) { numSegments++; } else { - if (upperBound * mergeFactor <= maxMergeDocs) { + if (currentBound * mergeFactor <= maxMergeDocs) { assertTrue(numSegments < mergeFactor); } do { - lowerBound = upperBound; - upperBound *= mergeFactor; - } while (docCount > upperBound); + previousBound = currentBound; + currentBound *= mergeFactor; + } while (docCount > currentBound); numSegments = 1; } } - if (upperBound * mergeFactor <= maxMergeDocs) { + if (currentBound * mergeFactor <= maxMergeDocs) { assertTrue(numSegments < mergeFactor); } Index: src/test/org/apache/lucene/index/DocHelper.java =================================================================== --- src/test/org/apache/lucene/index/DocHelper.java (revision 567805) +++ src/test/org/apache/lucene/index/DocHelper.java (working copy) @@ -236,7 +236,7 @@ //writer.setUseCompoundFile(false); writer.addDocument(doc); writer.flush(); - SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1); + SegmentInfo info = writer.newestSegment(); writer.close(); return info; } Index: src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- src/test/org/apache/lucene/index/TestDoc.java (revision 567805) +++ src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -168,7 +168,7 @@ Document doc = FileDocument.Document(file); writer.addDocument(doc); writer.flush(); - return writer.segmentInfos.info(writer.segmentInfos.size()-1); + return writer.newestSegment(); } Index: src/test/org/apache/lucene/index/TestLogDocMergePolicy.java =================================================================== --- src/test/org/apache/lucene/index/TestLogDocMergePolicy.java (revision 0) +++ src/test/org/apache/lucene/index/TestLogDocMergePolicy.java (revision 0) @@ -0,0 +1,214 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import junit.framework.TestCase; + +public class TestLogDocMergePolicy extends TestCase { + + void assertNotEquals(Object a, Object b) { + assertFalse(a.equals(b)); + } + + MockIndexMerger merger; + LogDocMergePolicy policy; + + void set(String sizes) { + merger = new MockIndexMerger (sizes); + policy = new LogDocMergePolicy(); + policy.setMaxMergeDocs(100000); + } + + void merge(String before, String after, int mergeFactor, int minMergeDocs) + throws CorruptIndexException, IOException { + set(before); + merger.setMaxBufferedDocs(minMergeDocs); + policy.setMergeFactor(mergeFactor); + policy.merge(merger.segmentInfos, merger); + MockIndexMerger other = new MockIndexMerger(after); + assertEquals(other.toString(), merger.toString()); + } + + void merge(String before, String after, int mergeFactor) + throws CorruptIndexException, IOException { + merge(before, after, mergeFactor, MockIndexMerger.DEFAULT_MAX_BUFFERED_DOCS); + + } + + void merge(String before, String after) + throws CorruptIndexException, IOException { + merge(before, + after, + LogDocMergePolicy.DEFAULT_MERGE_FACTOR, + MockIndexMerger.DEFAULT_MAX_BUFFERED_DOCS); + } + + public void testOneMerge() + throws CorruptIndexException, IOException { + merge("1000 10 0 10", "1000 10 10", 4, 4); + } + + public void testConsistentMerge() + throws CorruptIndexException, IOException { + + MockIndexMerger other; + + merge("0 10", "0 10"); + merge("1000 10 0 10", "1000 10 10", 4, 4); + merge("1000 0 0 10 0 0 10", "1000 10 0 10", 4); + merge("1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1", "1000 100 19 1 1 1 1 1"); + merge("1000 100 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0", "1000 100 10 0 0 0 0 0"); + merge("9 9 9 9 9 9 9 9 9 9","90"); + merge("10 10 10 10 10 10 10 10 10 10", "100"); + merge("100 100 100 100 100 100 100 100 100 100", "1000"); + merge("1000 10 x10 x10 x10", "1000 10 10 10 10"); + merge("1000 9 x9 x9 x9", "1000 9 9 9 9"); + merge("1000 10 x10 x10 x10", "1000 40", 4); + merge("1000 9 x9 x9 x9", "1000 36", 4); + merge("1000 0 10 0 10", "1000 20", 4); + + } + + public void testInconsistentMerge() + throws CorruptIndexException, IOException { + + merge("1001 101 11 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1001", "1120 1008"); + merge("1001 101 11 1 1 1 1 1 1 1 1 1 1 1 1 1 1 11", "1001 101 20 16"); + + } + + public void testChangeMergeFactor() + throws CorruptIndexException, IOException { + + MockIndexMerger other; + + set("11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11"); + policy.setMergeFactor(5); + policy.merge(merger.segmentInfos, merger); + other = new MockIndexMerger("275 11"); + assertEquals(other.toString(), merger.toString()); + + set("0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11"); + policy.setMergeFactor(5); + policy.merge(merger.segmentInfos, merger); + other = new MockIndexMerger("11"); + assertEquals(other.toString(), merger.toString()); + + set("0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10"); + policy.setMergeFactor(5); + policy.merge(merger.segmentInfos, merger); + other = new MockIndexMerger("0 10"); + assertEquals(other.toString(), merger.toString()); + } + + public void testOptimize() + throws CorruptIndexException, IOException { + + MockIndexMerger other; + + set ("1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000"); + + other = new MockIndexMerger("2124"); + policy.optimize(merger.segmentInfos, merger); + assertEquals(other.toString(), merger.toString()); + + set ("x1000"); + + other = new MockIndexMerger("1000"); + policy.optimize(merger.segmentInfos, merger); + assertEquals(other.toString(), merger.toString()); + + set ("1000 100 10 1"); + + other = new MockIndexMerger("1111"); + policy.optimize(merger.segmentInfos, merger); + assertEquals(other.toString(), merger.toString()); + + } + + public void testDirs() + throws CorruptIndexException, IOException { + + MockIndexMerger other; + + set ("100 10 1"); + + other = new MockIndexMerger("100 10 1"); + assertEquals(other.toString(), merger.toString()); + + other = new MockIndexMerger("100 x10 1"); + assertNotEquals(other.toString(), merger.toString()); + + set ("100 x10 1"); + policy.merge(merger.segmentInfos, merger); + + other = new MockIndexMerger("100 10 1"); + assertEquals(other.toString(), merger.toString()); + + } + + public void testInconsistent() { + + set("1001 1000 1000 101 100 11 1"); + assertTrue(policy.isConsistent(merger.segmentInfos, merger)); + + set("1001 1000 1001 1000 101 11 1"); + assertFalse(policy.isConsistent(merger.segmentInfos, merger)); + + set("1001 1000 1001 101 1000 101 11 1"); + assertFalse(policy.isConsistent(merger.segmentInfos, merger)); + + set("1001 11 101 1"); + assertFalse(policy.isConsistent(merger.segmentInfos, merger)); + + set("1001 101 11"); + assertTrue(policy.isConsistent(merger.segmentInfos, merger)); + + set("100001 10001 1001 101 11"); + try { + policy.isConsistent(merger.segmentInfos, merger); + fail(); + } catch (IllegalArgumentException e) { + } + + } + + public void testLowestConsistentBound() { + + set("101 101 1001 101 1"); + assertTrue(policy.lowestConsistentBound(merger.segmentInfos, merger) == + 10000); + + set("10000 1000 100 10 1 100 10 1"); + // System.err.println("? " + policy.lowestConsistentBound(merger.segmentInfos)); + assertTrue(policy.lowestConsistentBound(merger.segmentInfos, merger) == + 100); + + set("1000 1000 100 10 1 1000"); + assertTrue(policy.lowestConsistentBound(merger.segmentInfos, merger) == + 1000); + + set("1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000"); + assertTrue(policy.lowestConsistentBound(merger.segmentInfos, merger) == + 1000); + + } + +} Property changes on: src/test/org/apache/lucene/index/TestLogDocMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/MockIndexMerger.java =================================================================== --- src/test/org/apache/lucene/index/MockIndexMerger.java (revision 0) +++ src/test/org/apache/lucene/index/MockIndexMerger.java (revision 0) @@ -0,0 +1,278 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockRAMDirectory; + +import java.util.Map; +import java.util.HashMap; +import java.io.IOException; + +class MockIndexMerger implements IndexMerger { + + MergePolicy policy; + + public final static int DEFAULT_MAX_BUFFERED_DOCS = 10; + + private int maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS; + + public void setMaxBufferedDocs(int maxBufferedDocs) { + this.maxBufferedDocs = maxBufferedDocs; + } + + public int getMaxBufferedDocs() { + return maxBufferedDocs; + } + + private double ramBufferSizeMB = 0; + + public void setRAMBufferSizeMB(double ramBufferSizeMB) { + this.ramBufferSizeMB = ramBufferSizeMB; + } + + public double getRAMBufferSizeMB() { + return ramBufferSizeMB; + } + + void setMergePolicy(MergePolicy policy) { + this.policy = policy; + } + + public synchronized void optimize() + throws CorruptIndexException, IOException { + policy.optimize(segmentInfos, this); + } + + public synchronized void merge() + throws CorruptIndexException, IOException { + policy.merge(segmentInfos, this); + } + + static int segNumber = 0; + + MockIndexMerger(String string) { + + String[] strings = string.split("\\s"); + + segmentInfos = new SegmentInfos(); + + for(int i = 0; i < strings.length; i++) { + + StringBuffer buffer = new StringBuffer(strings[i]); + + boolean isCompoundFile = true; + boolean hasSingleNormsFile = true; + String dir = ""; + + while (buffer.charAt(0) < '0' || buffer.charAt(0) > '9') { + + char c = buffer.charAt(0); + buffer.deleteCharAt(0); + + switch(c) { + case 'c': isCompoundFile = true; break; + case 'C': isCompoundFile = false; break; + case 'x': dir = "x"; break; + case 'y': dir = "y"; break; + case 'z': dir = "z"; break; + } + + } + + int size = Integer.parseInt(buffer.toString()); + + SegmentInfo info = new SegmentInfo("name" + segNumber++, + size, + the(dir)); + info.setUseCompoundFile(isCompoundFile); + segmentInfos.addElement(info); + + } + } + + public void add(MockIndexMerger other) { + add(copy(other.segmentInfos)); + } + + protected void merge(SegmentInfo segment) { + } + + void add(SegmentInfos segments) { + synchronized(segmentInfos) { + segmentInfos.addAll(segments); + } + } + + static SegmentInfos copy(SegmentInfos segmentInfos) { + SegmentInfos segments = new SegmentInfos(); + synchronized(segmentInfos) { + segments.addAll(segmentInfos); + } + return segments; + } + + // segmentInfos.indexOf(segments.info(i)); + + int find( SegmentInfos segmentInfos, SegmentInfo segment ) { + for(int i=0; i < segmentInfos.size(); i++) { + System.err.println( segmentInfos.info(i).name + " " + segment.name ); + if (segmentInfos.info(i).name.equals( segment.name)) { + return i; + } + } + return -1; + } + + void replace(MergePolicy.MergeSpecification spec, SegmentInfo info) { + synchronized(segmentInfos) { + + SegmentInfos segments = spec.segments; + + for(int i = 0; i < segments.size(); i++) { + + int index = find(segmentInfos, segments.info(i)); + + if (index < 0) { + throw new RuntimeException("could not replace segmentInfo"); + } + + if (i == 0) { + segmentInfos.set(index, info); + } else { + segmentInfos.removeElementAt(index); + } + + } + + } + } + + public int merge(MergePolicy.MergeSpecification spec) + throws CorruptIndexException, IOException { + + SegmentInfo info = new SegmentInfo("name" + segNumber++, 0, directory); + info.setUseCompoundFile(spec.useCompoundFile); + + SegmentInfos segments = copy(spec.segments); + + int docCount = 0; + + for(int i = 0; i < segments.size(); i++) { + merge(segments.info(i)); + docCount += segments.info(i).docCount; + } + + info.docCount = docCount; + + replace(spec, info); + + return docCount; + } + + public Directory getDirectory() { + return directory; + } + + SegmentInfos segmentInfos; + Directory directory = the(); + + public String segString(SegmentInfos segments, + int first, + int beyond) { + + segments = copy(segments); + + StringBuffer buffer = new StringBuffer(); + + for(int i = first; i < beyond; i++) { + + if (i > first) { + buffer.append(' '); + } + + buffer.append('['); + buffer.append(i); + buffer.append(']'); + + SegmentInfo info = segments.info(i); + + try { + if (info.getUseCompoundFile()) { + buffer.append('c'); + } else { + buffer.append('C'); + } + } catch (Exception e) { + } + + if (info.dir != getDirectory()) { + buffer.append('x'); + } + + buffer.append(info.docCount); + + } + + return buffer.toString(); + + } + + public String segString(SegmentInfos segments) { + synchronized(segments) { + return segString(segments, 0, segments.size()); + } + } + + public String segString(MergePolicy.MergeSpecification spec) { + return segString(spec.segments); + } + + public String toString() { + return segString(segmentInfos); + } + + // utilities for managing a bunch of dirs + + static Map instances = new HashMap(); + + String name; + + static MockRAMDirectory the(String name) { + try { + if (!instances.containsKey(name)) { + instances.put(name, new MockRAMDirectory()); + } + return (MockRAMDirectory)instances.get(name); + } catch ( IOException ioe ) { + return null; + } + } + + static MockRAMDirectory the() { + return the(""); + } + + public String[] _list() { + return new String[0]; + } + +} + Property changes on: src/test/org/apache/lucene/index/MockIndexMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestMockIndexMerger.java =================================================================== --- src/test/org/apache/lucene/index/TestMockIndexMerger.java (revision 0) +++ src/test/org/apache/lucene/index/TestMockIndexMerger.java (revision 0) @@ -0,0 +1,89 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import junit.framework.TestCase; + +public class TestMockIndexMerger extends TestCase { + + void assertNotEquals(Object a, Object b) { + assertFalse(a.equals(b)); + } + + public void testMockIndexMerger() { + + assertEquals(new MockIndexMerger("1001 101 11 1").toString(), + new MockIndexMerger("1001 101 11 1").toString()); + + assertNotEquals(new MockIndexMerger("1001 101 11 1").toString(), + new MockIndexMerger("1001 101 11 11").toString()); + + } + + SegmentInfos segmentInfos(SegmentInfos all, int first, int last) { + SegmentInfos subset = new SegmentInfos(); + subset.addAll(all.subList(first, last)); + return subset; + } + + public void testUseCompoundFile() throws IOException { + + MockIndexMerger merger = new MockIndexMerger("c1001 C101"); + + assertTrue(merger.segmentInfos.info(0).getUseCompoundFile()); + assertFalse(merger.segmentInfos.info(1).getUseCompoundFile()); + + MergePolicy.MergeSpecification spec = new MergePolicy.MergeSpecification(); + spec.segments = segmentInfos(merger.segmentInfos, 0, 1); + spec.useCompoundFile = true; + + merger.merge(spec); + + assertTrue(merger.segmentInfos.info(0).getUseCompoundFile()); + assertFalse(merger.segmentInfos.info(1).getUseCompoundFile()); + + spec.segments = segmentInfos(merger.segmentInfos, 0, 1); + spec.useCompoundFile = false; + + merger.merge(spec); + + assertFalse(merger.segmentInfos.info(0).getUseCompoundFile()); + assertFalse(merger.segmentInfos.info(1).getUseCompoundFile()); + + spec.segments = segmentInfos(merger.segmentInfos, 1, 2); + spec.useCompoundFile = false; + + merger.merge(spec); + + assertFalse(merger.segmentInfos.info(0).getUseCompoundFile()); + assertFalse(merger.segmentInfos.info(1).getUseCompoundFile()); + + spec.segments = segmentInfos(merger.segmentInfos, 1, 2); + spec.useCompoundFile = true; + + merger.merge(spec); + + assertFalse(merger.segmentInfos.info(0).getUseCompoundFile()); + assertTrue(merger.segmentInfos.info(1).getUseCompoundFile()); + + + } + +} Property changes on: src/test/org/apache/lucene/index/TestMockIndexMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestIndexWriterDelete.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 567805) +++ src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy) @@ -32,9 +32,17 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.store.RAMDirectory; +import java.util.List; public class TestIndexWriterDelete extends TestCase { + class KeepAllDeletionPolicy implements IndexDeletionPolicy { + public void onInit(List commits) { + } + public void onCommit(List commits) { + } + } + // test the simple case public void testSimpleCase() throws IOException { String[] keywords = { "1", "2" }; @@ -445,15 +453,18 @@ } // Whether we succeeded or failed, check that all - // un-referenced files were in fact deleted (ie, - // we did not create garbage). Just create a - // new IndexFileDeleter, have it delete - // unreferenced files, then verify that in fact - // no files were deleted: + // un-referenced files were in fact deleted (ie, we did not + // create garbage). First we write the current segmentInfos + // out to disk. This will make sure the deleter doesn't + // delete files related to the current, unwritten + // state. Then create a new IndexFileDeleter with a keep all + // policy and have it delete unreferenced files and verify + // that in fact no files were deleted: + modifier.checkpoint(dir); String[] startFiles = dir.list(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); - IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null); + IndexFileDeleter d = new IndexFileDeleter(dir, new KeepAllDeletionPolicy(), infos, null , null); String[] endFiles = dir.list(); Arrays.sort(startFiles); @@ -496,6 +507,7 @@ Hits hits = null; try { hits = searcher.search(new TermQuery(searchTerm)); + // System.err.println( "hits after: " + hits.length() ); } catch (IOException e) { e.printStackTrace(); @@ -503,10 +515,16 @@ } int result2 = hits.length(); if (success) { - if (result2 != END_COUNT) { + // If we succeded, then we are in the second pass and have + // the results from the first. The results will depend on + // when the failure occured in the previous pass. All we + // guaranttee is that we will have either have seen all + // the deletes or udpates or none of them. + if (result2 != START_COUNT && result2 != END_COUNT) { + // System.err.println( "*Q1 " + autoCommit + " " + updates + " **" ); fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " - + result2 + " instead of expected " + END_COUNT); + + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); } } else { // On hitting exception we still may have added Index: src/test/org/apache/lucene/index/TestConcurrentLogDocMergePolicy.java =================================================================== --- src/test/org/apache/lucene/index/TestConcurrentLogDocMergePolicy.java (revision 0) +++ src/test/org/apache/lucene/index/TestConcurrentLogDocMergePolicy.java (revision 0) @@ -0,0 +1,245 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import junit.framework.TestCase; + +public class TestConcurrentLogDocMergePolicy extends TestCase { + + MockConcurrentIndexMerger merger; + LogDocMergePolicy logDocMergePolicy; + ConcurrentMergePolicy policy; + + void set( String sizes ) { + merger = new MockConcurrentIndexMerger ( sizes ); + logDocMergePolicy = new LogDocMergePolicy(); + policy = new ConcurrentMergePolicy( logDocMergePolicy ); + merger.setMergePolicy( policy ); + logDocMergePolicy.setMaxMergeDocs(100000); + } + + void merge( String before, String after, int mergeFactor, int minMergeDocs ) + throws CorruptIndexException, IOException { + set( before ); + merger.setMaxBufferedDocs( minMergeDocs ); + logDocMergePolicy.setMergeFactor( mergeFactor ); + merger.merge(); + MockConcurrentIndexMerger other = new MockConcurrentIndexMerger( after ); + // System.err.println( "> " + other.toString() ); + // System.err.println( "< " + merger.toString() ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + } + + void merge( String before, String after, int mergeFactor ) + throws CorruptIndexException, IOException { + merge( before, after, mergeFactor, IndexWriter.DEFAULT_MAX_BUFFERED_DOCS ); + + } + + void merge( String before, String after ) + throws CorruptIndexException, IOException { + merge( before, + after, + LogDocMergePolicy.DEFAULT_MERGE_FACTOR, + IndexWriter.DEFAULT_MAX_BUFFERED_DOCS ); + } + + public void _testManyLevelMerge() + throws CorruptIndexException, IOException { + + set ("100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100"); + logDocMergePolicy.setMergeFactor( 2 ); + merger.merge(); + policy.mergeWait(); + System.err.println( "< " + merger.toString() ); + assertEquals( merger.toString(), "[0]9200" ); + + } + + public void _testManyLevelOptimize() + throws CorruptIndexException, IOException { + + set ("100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100"); + logDocMergePolicy.setMergeFactor( 2 ); + merger.optimize(); + policy.mergeWait(); + System.err.println( "< " + merger.toString() ); + assertEquals( merger.toString(), "[0]c9200" ); + + } + + public void _testMultipleThreads() + throws CorruptIndexException, IOException { + + set ("1000 1000"); + logDocMergePolicy.setMergeFactor( 2 ); + merger.merge(); + MockIndexMerger other = new MockIndexMerger( "1000 1000" ); + merger.add( other ); + merger.merge(); + policy.mergeWait(); + assertEquals( merger.toString(), "[0]c4000" ); + + } + + public void _testClose() + throws CorruptIndexException, IOException { + + set ("1000 1000"); + logDocMergePolicy.setMergeFactor( 2 ); + merger.merge(); + MockIndexMerger other = new MockIndexMerger( "1000 1000" ); + merger.add( other ); + merger.merge(); + policy.close(); + System.err.println( "< !!! " + merger.toString() ); + assertEquals( merger.toString(), "[0]c4000" ); + + } + + public void _testParallelMerges() + throws CorruptIndexException, IOException { + + set ("1000 1000 1000 1000"); + logDocMergePolicy.setMergeFactor( 2 ); + merger.merge(); + MockIndexMerger other = new MockIndexMerger( "1000 1000" ); + merger.add( other ); + merger.merge(); + policy.mergeWait(); + assertEquals( "[0]c4000 [1]c2000", merger.toString() ); + + } + + public void _testConcurrentMerge() + throws CorruptIndexException, IOException { + + merge( "1 1 1 1 1 1 1 1 1 1", "10" ); + merge( "100 100 100 100 100 100 100 100 100 100", "1000" ); + + } + + public void _testConsistentMerge() + throws CorruptIndexException, IOException { + + MockConcurrentIndexMerger other; + + merge( "0 10", "0 10" ); + merge( "1000 10 0 10", "1000 10 10", 4, 4 ); + merge( "1000 0 0 10 0 0 10", "1000 10 0 10", 4 ); + merge( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1", "1000 100 19 1 1 1 1 1" ); + merge( "1000 100 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0", "1000 100 10 0 0 0 0 0" ); + merge( "9 9 9 9 9 9 9 9 9 9","90" ); + merge( "10 10 10 10 10 10 10 10 10 10", "100" ); + merge( "100 100 100 100 100 100 100 100 100 100", "1000" ); + merge( "1000 10 x10 x10 x10", "1000 10 10 10 10" ); + merge( "1000 9 x9 x9 x9", "1000 9 9 9 9" ); + merge( "1000 10 x10 x10 x10", "1000 40", 4 ); + merge( "1000 9 x9 x9 x9", "1000 36", 4 ); + merge( "1000 0 10 0 10", "1000 20", 4 ); + + } + + public void _testInconsistentMerge() + throws CorruptIndexException, IOException { + + merge( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000", "1117 1007" ); + merge( "1001 101 11 1 1 1 1 1 1 1 1 1 1 1 1 1 1 11", "1001 101 20 16" ); + + } + + public void _testChangeMergeFactor() + throws CorruptIndexException, IOException { + + MockConcurrentIndexMerger other; + + set( "10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10" ); + logDocMergePolicy.setMergeFactor( 5 ); + merger.merge(); + other = new MockConcurrentIndexMerger( "250 10" ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + + set( "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11" ); + logDocMergePolicy.setMergeFactor( 5 ); + merger.merge(); + other = new MockConcurrentIndexMerger( "11" ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + + set( "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10" ); + logDocMergePolicy.setMergeFactor( 5 ); + merger.merge(); + other = new MockConcurrentIndexMerger( "0 10" ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + } + + public void testOptimize() + throws CorruptIndexException, IOException { + + MockConcurrentIndexMerger other; + + set ( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000" ); + + other = new MockConcurrentIndexMerger( "2124" ); + merger.optimize(); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + + set ( "x1000" ); + + other = new MockConcurrentIndexMerger( "1000" ); + merger.optimize(); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + + set ( "1000 100 10 1" ); + + other = new MockConcurrentIndexMerger( "1111" ); + merger.optimize(); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + + } + + public void _testDirs() + throws CorruptIndexException, IOException { + + MockConcurrentIndexMerger other; + + set ( "100 10 1" ); + + other = new MockConcurrentIndexMerger( "100 10 1" ); + assertEquals( other.toString(), merger.toString() ); + + other = new MockConcurrentIndexMerger( "100 x10 1" ); + assertTrue( other.toString() != merger.toString() ); + + set ( "100 x10 1" ); + merger.merge(); + + other = new MockConcurrentIndexMerger( "100 10 1" ); + assertEquals( other.toString(), merger.toString() ); + + } + +} Property changes on: src/test/org/apache/lucene/index/TestConcurrentLogDocMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestDocumentWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 567805) +++ src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy) @@ -62,7 +62,7 @@ IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.addDocument(testDoc); writer.flush(); - SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1); + SegmentInfo info = writer.newestSegment(); writer.close(); //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.get(info); @@ -123,7 +123,7 @@ writer.addDocument(doc); writer.flush(); - SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1); + SegmentInfo info = writer.newestSegment(); writer.close(); SegmentReader reader = SegmentReader.get(info); @@ -156,7 +156,7 @@ writer.addDocument(doc); writer.flush(); - SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1); + SegmentInfo info = writer.newestSegment(); writer.close(); SegmentReader reader = SegmentReader.get(info); Index: src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java =================================================================== --- src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java (revision 567805) +++ src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java (working copy) @@ -272,7 +272,8 @@ writer.addIndexesNoOptimize(new Directory[] { aux, aux }); assertEquals(1020, writer.docCount()); - assertEquals(2, writer.getSegmentCount()); + /* note this is different than the 2.2 merge because of corner cases in the default policy */ + assertEquals(3, writer.getSegmentCount()); assertEquals(1000, writer.getDocCount(0)); writer.close(); Index: src/test/org/apache/lucene/index/TestIndexModifier.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexModifier.java (revision 567805) +++ src/test/org/apache/lucene/index/TestIndexModifier.java (working copy) @@ -193,7 +193,7 @@ } public int docFreq(Term term) throws IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexReader(); return indexReader.docFreq(term); } Index: src/test/org/apache/lucene/index/MockConcurrentIndexMerger.java =================================================================== --- src/test/org/apache/lucene/index/MockConcurrentIndexMerger.java (revision 0) +++ src/test/org/apache/lucene/index/MockConcurrentIndexMerger.java (revision 0) @@ -0,0 +1,38 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.lang.Thread; +import java.lang.InterruptedException; + +class MockConcurrentIndexMerger extends MockIndexMerger { + + MockConcurrentIndexMerger( String string ) { + super( string ); + } + + protected void merge( SegmentInfo segment ) { + try { + Thread.sleep( segment.docCount/10 ); + } catch ( InterruptedException ie ) { + } + } + +} + Property changes on: src/test/org/apache/lucene/index/MockConcurrentIndexMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/LogDocMergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/LogDocMergePolicy.java (revision 0) +++ src/java/org/apache/lucene/index/LogDocMergePolicy.java (revision 0) @@ -0,0 +1,750 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Vector; + +/** + * Logarithmic merge policy + * + */ + +/* Logarithmic merge policy + * + * The LMP maintains segments as increasing (from the right) groups of + * segments where the number of documents in each segment increases + * exponentially. + * + * The LMP has three parameters: + * + * minMergeDocs: this is the lowest number of documents in a segment (comes from IndexWriter) + * mergeFactor: this is the base of the logarithm + * maxMergeDocs: this is used to compute the bound on the segment size + * + * Segments are conceptually assigned a level based on the number of + * documents they contain: + * + * level = ceiling(logM(ceiling(docs / minMergeDocs))) + * + * logM: log-base mergeFactor + * docs = number of documents in the segment + * + * In the simple case, where no deletes are occurring, when a new + * segment of level N is created and M-1 segments already exist at + * that level, a new merge will be initiated to replace the now M + * segments at level N with a new segment at level N+1. + * + * Notes: + * + * The number of documents does not (currently) consider latent + * deleted documents. + * + * The number of documents that a merge produces is dependent on + * latent deleted documents in the segments making up the + * merge and the result of a merge may result in segment list that + * does not maintain the desired logarithmic invariant. + * + */ + +class LogDocMergePolicy implements MergePolicy { + + /** + * Default value is 10. Change using {@link #setMergeFactor(int)}. + */ + public final static int DEFAULT_MERGE_FACTOR = 10; + + /** + * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}. + */ + public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; + + /** Use compound file setting. Defaults to true, minimizing the number of + * files used. Setting this to false may improve indexing performance, but + * may also cause file handle problems. + */ + private boolean useCompoundFile = true; + + public LogDocMergePolicy() { + } + + public void close() {} + + /** Setting to turn on usage of a compound file. When on, multiple files + * for each segment are merged into a single file once the segment creation + * is finished. This is done regardless of what directory is in use. + */ + public void setUseCompoundFile(boolean useCompoundFile) { + this.useCompoundFile = useCompoundFile; + } + + /** See {@link #getUseCompountFile(boolean)} + */ + public boolean getUseCompoundFile() { + return useCompoundFile; + } + + /** Called by writers/mergers to determine whether newly created + * segments should be in compound file format or not. + */ + public boolean useCompoundFile(SegmentInfos segments, + SegmentInfo newSegment) { + return useCompoundFile; + } + + /** Called by writers/mergers to determine whether newly created + * doc store files should be in compound file format or not. + */ + public boolean useCompoundDocStore(SegmentInfos segments) { + return useCompoundFile; + } + + private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + + /** Determines the largest number of documents ever merged by addDocument(). + * Small values (e.g., less than 10,000) are best for interactive indexing, + * as this limits the length of pauses while indexing to a few seconds. + * Larger values are best for batched indexing and speedier searches. + * + *

The default value is {@link Integer#MAX_VALUE}. + */ + public void setMaxMergeDocs(int maxMergeDocs) { + if (maxMergeDocs < 2) + throw new IllegalArgumentException("maxMergeDocs must at least be 2"); + this.maxMergeDocs = maxMergeDocs; + } + + /** See {@link #setMaxMergeDocs(int)} + */ + public int getMaxMergeDocs() { + return maxMergeDocs; + } + + private int mergeFactor = DEFAULT_MERGE_FACTOR; + + /** Determines how often segment indices are merged during addDocument(). With + * smaller values, less RAM is used while indexing, and searches on + * unoptimized indices are faster, but indexing speed is slower. With larger + * values, more RAM is used during indexing, and while searches on unoptimized + * indices are slower, indexing is faster. Thus larger values (> 10) are best + * for batch index creation, and smaller values (< 10) for indices that are + * interactively maintained. + * + *

This must never be less than 2. The default value is 10. + */ + public void setMergeFactor(int mergeFactor) { + if (mergeFactor < 2) + throw new IllegalArgumentException("mergeFactor must at least be 2"); + this.mergeFactor = mergeFactor; + } + + /** + * @see #setMergeFactor + */ + public int getMergeFactor() { + return mergeFactor; + } + + /* a ConsistencyState object wraps the merge invariants: + * + * 1) that the list of segments, viewed from the end, is a + * monotonically increasing in level (the log of the number of + * documents (not including deleted documents)). + * + * 2) All segments are owned by the destination directory. + * + * Inconsistencies are measured in three ways: + * + * lowestConsistentBound: if not zero, it is the lowest logarithmic + * level above which logarithmic monotonicity holds. E.g., (assuming + * minMergeDocs = mergeFactor = 10 + * LCB(100 10 1) == 0 + * LCB(100 1 10) == 100 + * LCB(1 10 100) == 1000 + * + * firstInconsistentSegment: when the invariants aren't met, the + * firstInconsistentSegment is the index of, well, the first + * inconsistent segment, e.g., (same givens as above) + * FIS(100 1 10) = 1 + * + * inconsistentDirectories: a list of directories not contained by + * the merge which need to be copied if not otherwise merged + * + * Notes: + * + * Consistency in this context does not include checking that the number of segments at a level is <= M + * but the merge policy will still consolidate those. + * + */ + + class ConsistencyState { + + Vector inconsistentDirectories = new Vector(); + int lowestConsistentBound = 0; + int firstInconsistentSegment = -1; + + ConsistencyState(SegmentInfos segmentInfos, IndexMerger merger) { + + int previousBound = -1; + int currentBound = merger.getMaxBufferedDocs(); + + /* synchronized(segmentInfos) */ { + + for (int i = segmentInfos.size()-1; i >=0; i--) { + + SegmentInfo info = segmentInfos.info(i); + + if (info.dir != merger.getDirectory()) { + inconsistentDirectories.add(0, new Integer(i)); + } + + int docCount = info.docCount; + + if (docCount <= previousBound) { + lowestConsistentBound = currentBound; + firstInconsistentSegment = i; + } + + while (docCount > currentBound) { + + previousBound = currentBound; + currentBound *= mergeFactor; + + if (currentBound > maxMergeDocs) { + throw new IllegalArgumentException("No segment size can exceed maxMergeDocs"); + } + } + + } + + } + + } + + boolean isConsistent() { + return lowestConsistentBound == 0 && inconsistentDirectories.isEmpty(); + } + + } + + int lowestConsistentBound(SegmentInfos segmentInfos, IndexMerger merger) { + return new ConsistencyState(segmentInfos, merger).lowestConsistentBound; + } + + protected int merge(MergeSpecification spec, IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + int docCount = merger.merge(spec); + return docCount; + } + + SegmentInfos segmentInfos(SegmentInfos all, int first, int last) { + SegmentInfos subset = new SegmentInfos(); + /* synchronized(all) */ { + subset.addAll(all.subList(first, last)); + } + return subset; + } + + static SegmentInfos copy(SegmentInfos segmentInfos) { + SegmentInfos result = new SegmentInfos(); + /* synchronized(segmentInfos) */ { + result.addAll(segmentInfos); + } + return result; + } + + /* The primary method for handling merges is cascadingMerge. It + * takes a consistent list of segments and generates primitive + * merges until the logarithmic invariants are met. + * + * This function takes two parameters that override the "default" + * behaviour: + * + * firstBound: this parameter effectively overrides minMergeDocs for + * one call, the result being that all segments with less than + * firstBound docs will be considered to be at the same level. This + * behaviour is used when making an inconsistent index consistent: + * cascadingMerge is called with firstBound = leastConsistentBound. + * + * firstSegmentToConsider: this is used to manually set the left + * boundary of the subsequence. It also causes a merge even if the + * number of segments that would be merged is less than + * mergeFactor. It is used as part of the process of making an index + * consistent. + * + * Differences from the non-factored version: + * + * Prcoessing of non-minimum sized segments: this function, rather + * than staring at minimimum sized segments and stopping if none are + * found, looks at the lowest level found and checks it, stopping if + * it is okay. This is designed to complement the consitency checker + * which handles lists where levels are not contiguous but allows + * segments which are over populated (too big) but logartihmically + * monotonic. + * + * Notes on concurrency: if the concurrent merge work goes forward, + * this function needs to be changed structurally. When concurrent + * can occur, segmentInfos can change over time and in a way + * predictable in the current thread. In particular, during a merge, + * segmentInfos can change a lot and thus the loop below which + * expects to do multiple merges based on data gleaned before + * beginning the merges probably needs to change. + */ + + public void cascadingMerge(SegmentInfos segmentInfos, + IndexMerger merger, + int firstBound, + int firstSegmentToConsider) + throws MergeException, CorruptIndexException, IOException { + + long previousBound = -1; + long currentBound = firstBound; + + assert currentBound != 0; + + /* Determine the bounds for the smallest existing segment */ + + int firstCount = firstCount = segmentInfos.info(segmentInfos.size()-1).docCount; + while (firstCount > currentBound) { + previousBound = currentBound; + currentBound *= mergeFactor; + } + + /* find the contigiuos subseguence of segments on this level, + * searching from the right */ + + while (currentBound <= maxMergeDocs) { + + /* starts like this: */ + /* abcdefg */ + /*r l*/ + /* and iterates by pulling the left pointer towards the left */ + + int left = segmentInfos.size(); + int right = -1; + + /* search from the right ... */ + + while(--left >= 0) { + + int docCount = segmentInfos.info(left).docCount; + + /* first time a segment is in bounded, set right */ + + if (right == -1 && docCount > previousBound && docCount <= currentBound) { + right = left; + } + + /* first time a segment is above bound, stop, leaving left where it is + * else continue, which pulls left towards the left */ + + if (docCount > currentBound) { + break; + } + + } + + int first = left + 1; + + if (firstSegmentToConsider >= 0) { + first = firstSegmentToConsider; + } + + int length = right - first + 1; + + /* intoNext level is used to determine how to handle the results + * of a merge. If the merge results in a segment in the next + * level, a cascade will ensue. Otherwise, the merge has created + * a segment at the same level as the pervious segments and is a + * candidate for further merging at that level. + */ + + boolean intoNextLevel = false; + + MergeSpecification spec = new MergeSpecification(); + spec.useCompoundFile = useCompoundFile; + + while (length > 0) { + + int mergeLength = mergeFactor; + + /* Normally, if the length of the subsequence is less than + * mergeFactor, no merge will occur. THis is modified if first + * has been overriden, as described above. + */ + + if (length < mergeLength) { + if (firstSegmentToConsider == -1) { + break; + } + mergeLength = length; + } + + int last = first + mergeLength; + + /* Create the subsquence. Note this creates a new segmentInfos + * (so it's not == to the one owned by the merger). This + * should help in concurrency but is not alone sufficient. + */ + + spec.segments = segmentInfos(segmentInfos, first, last); + + int docCount = 0; + + docCount = merge(spec, merger); + + length -= mergeLength; + + firstSegmentToConsider = -1; + + if (docCount == -1) { + + /* If the merge ends up being concurrent, we don't know the + * number of documents that will result. So we just look at + * the rest of the segments. Note, though, that the bump by + * mergeLength assumes that segmentInfos hasn't been updated + * before this code is run, which is not safe. That's an + * example of where this needs to be tweaked for + * concurrency. Partly this could be handled by going to a + * rescrusvie rather than itteractive structure. But there + * are other issues that needs to be explored / understood ... + */ + + first += mergeLength; + + } else if (docCount > currentBound) { + + /* If the new segment is into the next level, record that + * fact and continue with other segments at this level. */ + + first++; + intoNextLevel = true; + + } else { + + /* The new segment is still in the current level, so just + * add it back to this list of segments on this level */ + + length++; + + } + + } + + /* At this point, there aren't maxMergeSegments left to be + * merged at this level. If at some point we created a segment + * at the next level, we need to check it, so we'll fall through + * the break. Otherwise, the break fires, indicating that no + * further merges are needed to ensure consistency (as long as + * we were already consistent). + */ + if (!intoNextLevel) { + break; + } + + previousBound = currentBound; + currentBound *= mergeFactor; + + } + + } + + public void cascadingMerge(SegmentInfos segmentInfos, + IndexMerger merger, + int firstBound) + throws MergeException, CorruptIndexException, IOException { + cascadingMerge(segmentInfos, merger, firstBound, /* firstSegmentToConsider = */ -1); + } + + public void cascadingMerge(SegmentInfos segmentInfos, IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + cascadingMerge(segmentInfos, + merger, + /* firstBound = */ merger.getMaxBufferedDocs(), + /* firstSegmentToConsider = */ -1); + } + + public boolean isConsistent(SegmentInfos segmentInfos, + IndexMerger merger) { + return ((new ConsistencyState(segmentInfos, merger)).isConsistent()); + } + + /* Use the merger to copy a squence of segments */ + void copy(SegmentInfos segmentInfos, + IndexMerger merger, + int first, + int last) + throws MergeException, CorruptIndexException, IOException { + + MergeSpecification spec = new MergeSpecification(); + spec.segments = segmentInfos(segmentInfos, first, last); + spec.useCompoundFile = useCompoundFile; + + merge(spec, merger); + + } + + /* This is the "alternative" to cascadingMerge. It is used to take + * an inconsistent list of segments and make them consistent. It + * uses cascadingMerge and copy to do this. + * + * This code is based closely on the non-factored version. + * + * Differences from the non-factored version: + * + * In the non-factored veresion, IndexWriter#addIndexesNoOptimize + * has most of the code for making an index consistent. It's + * necessary in this function since it's purpose is to aggregate a + * number of indexes, in which case it's highlty unlikely the + * resulitng sequnce will follow the logarithmic + * invariants. However, there are other reasons an index will not be + * consistent. It may well not be consistent if parameters + * change. Deletes can also cause merges that result in + * inconsistency (I think). + * + * makeConsistent is supposed to take an inconsistent list and make + * it consistent. It does this without regard for what caused the + * inconsistency. Beacuse it makes fewer assumptions, the algorithm + * is slightly different than addIndexesNoOptimize and the results + * can be different in a few cases (though the invariants will still + * hold). + */ + + boolean makeConsistent(SegmentInfos segmentInfos, IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + + ConsistencyState state = new ConsistencyState(segmentInfos, merger); + + if (state.isConsistent()) { + return false; + } + + /* If the segment list is not composed of continguous subsequences + * at monotonically increasing levels, use cascadingMerge with + * firstBound == the lowest consistent bound. This will devide + * the list into two pieces: a (possibly empty) consistent set on + * the left and an inconsistent set on the right, e.g., + * + * 1000 1000 10 100 10 + * < con > < incon > + * + * Starting with the left-most inconsistent segment, + * cascadingMerge will merge mergeFactor segments at a time using + * it's normal algorithm for the given level. + */ + + if (state.lowestConsistentBound > 0) { + cascadingMerge(segmentInfos, merger, state.lowestConsistentBound); + state = new ConsistencyState(segmentInfos, merger); + } + + /* cascadingMerge will stop when there are less than + * maxMergeSegments, but there may still be inconsistency in the last + * segments, of which there may be up to mergeFactor - 1 segments. + * For example, we could be left with + * + * 1000 100 100 10 100 + * + * with M = 4. Since the first inconsistency is only one from the end, + * it will not get merged. + * + * We use cascadingMerge again, but force it to merge these + * segments by specifying the firstInconsistentSegment. + */ + + if (state.lowestConsistentBound > 0) { + cascadingMerge(segmentInfos, + merger, + state.lowestConsistentBound, + state.firstInconsistentSegment); + } + + /* All of these merges will (should?) have left the sequence + * consistent, but there may be further possible merges. Try */ + + cascadingMerge(segmentInfos, merger); + + state = new ConsistencyState(segmentInfos, merger); + + /* Finally, it's possible this merger doesn't even own some of + * these segments. If all of the earlier merges left some external + * segments untouched, copy them in unchanged (except for garbage + * collected deleted docs */ + + if (state.inconsistentDirectories.size() > 0) { + + for(int i = 0; i < state.inconsistentDirectories.size(); i++) { + + int segment = ((Integer)(state.inconsistentDirectories.elementAt(i))).intValue(); + copy(segmentInfos, merger, segment, segment+1); + } + + /* The copy could have generated segments that, via deleted doc + * gc, no longer obey the invariants. So just try the whole + * thing again. This might be overkill? Or maybe we should + * actually be doing this in more cases? + */ + + makeConsistent(segmentInfos, merger); + + } + + return true; + + } + + public void checkedMerge(SegmentInfos segmentInfos, + IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + + assert segmentInfos.size() > 0; + + /* Since any merge can result in inconsistency, we have to run + * makeConsistent aftewards anyway. Would it be better to combine + * the code and put it in a while loop? Would that be too much + * extra computation? Not sure yet ... + */ + + if (!makeConsistent(segmentInfos, merger)) { + cascadingMerge(segmentInfos, merger); + makeConsistent(segmentInfos, merger); + } + + } + + /** + *

The amount of free space required when a merge is + * triggered is up to 1X the size of all segments being + * merged, when no readers/searchers are open against the + * index, and up to 2X the size of all segments being + * merged when readers/searchers are open against the + * index (see {@link #optimize()} for details). Most + * merges are small (merging the smallest segments + * together), but whenever a full merge occurs (all + * segments in the index, which is the worst case for + * temporary space usage) then the maximum free disk space + * required is the same as {@link #optimize}.

+ */ + public void merge(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException { + + try { + checkedMerge(segmentInfos, merger); + } catch (MergeException me) { + throw new RuntimeException ("merge exception"); + } + } + + public void checkedOptimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws MergeException, CorruptIndexException, IOException { + + if (segmentInfos.size() > 0) { + + while ((segmentInfos.size() > 1) || + SegmentReader.hasDeletions(segmentInfos.info(0)) || + SegmentReader.hasSeparateNorms(segmentInfos.info(0)) || + (segmentInfos.info(0).dir != merger.getDirectory()) || + (useCompoundFile && (!SegmentReader.usesCompoundFile(segmentInfos.info(0))))) { + + MergeSpecification spec = new MergeSpecification(); + + int first = segmentInfos.size() - mergeFactor; + first = first < 0 ? 0 : first; + int last = segmentInfos.size(); + + spec.segments = segmentInfos(segmentInfos, first, last); + spec.useCompoundFile = useCompoundFile; + + merge(spec, merger); + + } + + } + + } + + public void optimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException { + try { + checkedOptimize(segmentInfos, merger); + } catch (MergeException me) { + throw new RuntimeException (me); + } + } + + static public String segString(SegmentInfos segmentInfos, + int first, + int beyond, + IndexMerger merger) { + + StringBuffer buffer = new StringBuffer(); + + for(int i = first; i < beyond; i++) { + + if (i > first) { + buffer.append(' '); + } + + buffer.append('['); + buffer.append(i); + buffer.append(']'); + + SegmentInfo info = segmentInfos.info(i); + + try { + if (info.getUseCompoundFile()) { + buffer.append('c'); + } else { + buffer.append('C'); + } + } catch (Exception e) { + } + + if (info.dir != merger.getDirectory()) { + buffer.append('x'); + } + + buffer.append(info.docCount); + + } + + return buffer.toString(); + + } + + public String segString(SegmentInfos segmentInfos, + IndexMerger merger, + int first, + int beyond) { + + return segString(segmentInfos, merger, first, beyond); + } + + static public String segString(SegmentInfos segmentInfos, + IndexMerger merger) { + return segString(segmentInfos, 0, segmentInfos.size(), merger); + } + + static public String segString(MergeSpecification spec, IndexMerger merger) { + return segString(spec.segments, merger); + } + +} Property changes on: src/java/org/apache/lucene/index/LogDocMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/ConcurrentMergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/ConcurrentMergePolicy.java (revision 0) +++ src/java/org/apache/lucene/index/ConcurrentMergePolicy.java (revision 0) @@ -0,0 +1,392 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.store.Directory; + +import java.util.Map; +import java.util.HashMap; + +/* + * This class wraps a serial merger policy but potentially runs + * multiple, non-conflicting in threads. + * + * To be considered: + * + * May want to bound the number of threads. + * + * May want to prioritize threads. + * + * It's possible that a merge may start in a thread, and then become + * unecessary because of a rollback. Perhaps this could be checked by + * periodically checking that the segments being merged are still + * + * At the completion of a thread, more merges may become possible, + * e.g., a cascade. In the serial case, this is done in a loop. Should + * the CLMP call back to the merger to request another merge? + * + * more to be thought of ... + */ + +class ConcurrentMergePolicy implements MergePolicy, IndexMerger { + + class Conflict extends MergeException {} + + MergePolicy serialPolicy; + IndexMerger merger; + + public ConcurrentMergePolicy(MergePolicy serialPolicy) { + this.serialPolicy = serialPolicy; + } + + Map reserved = new HashMap(); + + public void merge(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException { + saveMerger(merger); + boolean successful = false; + while (!successful) { + try { + serialPolicy.checkedMerge(segmentInfos,this); + successful = true; + } catch (Conflict conflict) { + conflictWait(); + } catch (MergeException me) { + throw new RuntimeException (me); + } + } + } + + public void optimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException { + saveMerger(merger); + boolean successful = false; + while (!successful) { + try { + serialPolicy.checkedOptimize(segmentInfos,this); + successful = true; + } catch (Conflict conflict) { + conflictWait(); + } catch (MergeException me) { + throw new RuntimeException (me); + } + } + } + + public void close() { + mergeWait(); + serialPolicy.close(); + } + + public boolean useCompoundFile(SegmentInfos segments, + SegmentInfo newSegment) { + return serialPolicy.useCompoundFile(segments, newSegment); + } + + public boolean useCompoundDocStore(SegmentInfos segments) { + return serialPolicy.useCompoundDocStore(segments); + } + + void saveMerger(IndexMerger merger) { + if (this.merger == null) { + this.merger = merger; + } else if (this.merger != merger) { + throw new IllegalArgumentException("inconsistent merger passed"); + } + } + + public static final int DEFAULT_MIN_CONCURRENT_MERGE = 100; + + private int minConcurrentMerge = DEFAULT_MIN_CONCURRENT_MERGE; + + public void setMinConcurrentMerge(int minConcurrentMerge) { + this.minConcurrentMerge = minConcurrentMerge; + } + + public int getMinConcurrentMerge() { + return minConcurrentMerge; + } + + int numberOfDocuments (SegmentInfos segmentInfos) { + int number = 0; + for (int i = 0; i < segmentInfos.size(); i++) { + number += segmentInfos.info(i).docCount; + } + return number; + } + + boolean isMergeConcurrent(MergeSpecification spec) + throws CorruptIndexException, IOException { + + if (numberOfDocuments(spec.segments) < minConcurrentMerge) { + return false; + } + + return true; + + } + + class Reservation { + MergeSpecification spec; + + Reservation(MergeSpecification spec) { + this.spec = spec.copy(); + } + + void close() { + release(spec); + } + } + + Reservation reserve(MergeSpecification spec) { + + System.err.println("reserve " + segString(spec)); + + synchronized (reserved) { + + for (int segment = 0; segment < spec.segments.size(); segment++) { + if (reserved.containsKey(spec.segments.info(segment))) { + System.err.println("conflict of " + spec.segments.info(segment).name); + return null; + } + } + + for (int segment = 0; segment < spec.segments.size(); segment++) { + reserved.put(spec.segments.info(segment).name, spec.segments.info(segment)); + } + } + + return new Reservation(spec); + } + + void release(MergeSpecification spec) { + + synchronized (reserved) { + System.err.println("release " + segString(spec)); + for (int segment = 0; segment < spec.segments.size(); segment++) { + System.err.println("release " + spec.segments.info(segment).name); + reserved.remove(spec.segments.info(segment).name); + } + + reserved.notifyAll(); + } + + } + + class ConcurrentMerge implements Runnable { + + Reservation reservation; + + ConcurrentMerge(Reservation reservation) { + this.reservation = reservation; + } + + public void run() { + System.err.println("starting thread " + Thread.currentThread().getName() + " " + segString(reservation.spec.segments)); + try { + + MergeSpecification spec = reservation.spec; + + for (int segment = 0; segment < spec.segments.size(); segment++) { + System.err.println("owns " + Thread.currentThread().getName() + " " + spec.segments.info(segment)); + } + + merger.merge(reservation.spec); + } catch (MergeException me) { + throw new RuntimeException(me); + } catch (CorruptIndexException cie) { + throw new RuntimeException(cie); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } finally { + reservation.close(); + } + try { + merger.merge(); + } catch (IOException ioe) { + } + System.err.println("finishing thread " + Thread.currentThread().getName() + " " + segString(reservation.spec.segments)); + } + } + + public int merge(MergePolicy.MergeSpecification spec) + throws MergeException, CorruptIndexException, IOException { + + System.err.println("concurrent " + segString(spec)); + System.err.println(spec.segments); + System.err.println(spec.segments.info(0)); + + boolean isConcurrent = isMergeConcurrent(spec); + + Reservation reservation = null; + + int docCount = -1; + + try { + + if (isConcurrent && ((reservation = reserve(spec)) == null)) { + throw new Conflict(); + } + + if (isConcurrent) { + new Thread(new ConcurrentMerge(reservation)).start(); + reservation = null; + } else { + docCount = merger.merge(spec); + } + + } finally { + if (reservation != null) { + reservation.close(); + reservation = null; + } + } + + System.err.println("concurrent after " + segString(spec.segments)); + + return docCount; + } + + public Directory getDirectory() { + return merger.getDirectory(); + } + + public int getMaxBufferedDocs() { + return merger.getMaxBufferedDocs(); + } + + public double getRAMBufferSizeMB() { + return merger.getRAMBufferSizeMB(); + } + + public void optimize() + throws CorruptIndexException, IOException { + assert merger != null; + merger.optimize(); + } + + public void merge() + throws CorruptIndexException, IOException { + assert merger != null; + merger.merge(); + } + + /* + public static final int DEFAULT_MIN_CONCURRENT_MERGE = 100; + + void setMinConcurrentMerge(int minConcurrentMerge); + int getMinConcurrentMerge(); + */ + + void mergeWait() { + synchronized (reserved) { + while(!reserved.isEmpty()) { + try { + System.err.println("* waiting ... " + Thread.currentThread().getName()); + reserved.wait(); + System.err.println("* awake ... " + Thread.currentThread().getName()); + } catch (InterruptedException ie) { + } + } + } + } + + public void conflictWait() { + synchronized (reserved) { + try { + System.err.println("waiting ... " + Thread.currentThread().getName()); + reserved.wait(); + System.err.println("awake ... " + Thread.currentThread().getName()); + } catch (InterruptedException ie) { + } + } + } + + public void checkedMerge(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException, MergeException { + saveMerger(merger); + serialPolicy.checkedMerge(segmentInfos,this); + } + + public void checkedOptimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException, MergeException { + serialPolicy.checkedOptimize(segmentInfos,this); + } + + static public String segString(SegmentInfos segmentInfos, + int first, + int beyond, + IndexMerger merger) { + + StringBuffer buffer = new StringBuffer(); + + for(int i = first; i < beyond; i++) { + + if (i > first) { + buffer.append(' '); + } + + buffer.append('['); + buffer.append(i); + buffer.append(']'); + + SegmentInfo info = segmentInfos.info(i); + + try { + if (info.getUseCompoundFile()) { + buffer.append('c'); + } else { + buffer.append('C'); + } + } catch (Exception e) { + } + + if (info.dir != merger.getDirectory()) { + buffer.append('x'); + } + + buffer.append(info.docCount); + + } + + return buffer.toString(); + + } + + public String segString(SegmentInfos segmentInfos, + int first, + int beyond) { + + return segString(segmentInfos, first, beyond, merger); + } + + public String segString(SegmentInfos segmentInfos) { + return segString(segmentInfos, 0, segmentInfos.size()); + } + + public String segString(MergeSpecification spec) { + return segString(spec.segments); + } + +} Property changes on: src/java/org/apache/lucene/index/ConcurrentMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/IndexModifier.java =================================================================== --- src/java/org/apache/lucene/index/IndexModifier.java (revision 567805) +++ src/java/org/apache/lucene/index/IndexModifier.java (working copy) @@ -101,12 +101,25 @@ // Lucene defaults: protected PrintStream infoStream = null; - protected boolean useCompoundFile = true; + + private MergePolicy mergePolicy = new LogDocMergePolicy(); + + /** + * @deprecated + * @see LogDocMergePolicy#getUseCompoundFile(boolean) + */ + protected boolean useCompoundFile = getUseCompoundFile();; + protected int maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; protected int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH; - protected int mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR; /** + * @deprecated + * @see: LogDocMergePolicy.DEFAULT_MERGE_FACTOR + */ + protected int mergeFactor = LogDocMergePolicy.DEFAULT_MERGE_FACTOR; + + /** * Open an index with write access. * * @param directory the index directory @@ -180,7 +193,7 @@ * Throw an IllegalStateException if the index is closed. * @throws IllegalStateException */ - protected void assureOpen() { + protected void ensureOpen() { if (!open) { throw new IllegalStateException("Index is closed"); } @@ -202,11 +215,10 @@ } indexWriter = new IndexWriter(directory, analyzer, false); indexWriter.setInfoStream(infoStream); - indexWriter.setUseCompoundFile(useCompoundFile); if (maxBufferedDocs != 0) indexWriter.setMaxBufferedDocs(maxBufferedDocs); indexWriter.setMaxFieldLength(maxFieldLength); - indexWriter.setMergeFactor(mergeFactor); + indexWriter.setMergePolicy(mergePolicy); } } @@ -218,6 +230,8 @@ protected void createIndexReader() throws CorruptIndexException, IOException { if (indexReader == null) { if (indexWriter != null) { + indexWriter.flush(); + indexWriter.releaseMergePolicy(); indexWriter.close(); indexWriter = null; } @@ -235,8 +249,10 @@ */ public void flush() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { + indexWriter.flush(); + indexWriter.releaseMergePolicy(); indexWriter.close(); indexWriter = null; createIndexWriter(); @@ -263,7 +279,7 @@ */ public void addDocument(Document doc, Analyzer docAnalyzer) throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); if (docAnalyzer != null) indexWriter.addDocument(doc, docAnalyzer); @@ -307,7 +323,7 @@ */ public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexReader(); return indexReader.deleteDocuments(term); } @@ -326,7 +342,7 @@ */ public void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexReader(); indexReader.deleteDocument(docNum); } @@ -341,7 +357,7 @@ */ public int docCount() { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { return indexWriter.docCount(); } else { @@ -363,7 +379,7 @@ */ public void optimize() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); indexWriter.optimize(); } @@ -378,7 +394,7 @@ */ public void setInfoStream(PrintStream infoStream) { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { indexWriter.setInfoStream(infoStream); } @@ -396,31 +412,50 @@ */ public PrintStream getInfoStream() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); return indexWriter.getInfoStream(); } } /** - * Setting to turn on usage of a compound file. When on, multiple files - * for each segment are merged into a single file once the segment creation - * is finished. This is done regardless of what directory is in use. - * @see IndexWriter#setUseCompoundFile(boolean) - * @throws IllegalStateException if the index is closed + * Set the merge policy used by this IndexModifier */ - public void setUseCompoundFile(boolean useCompoundFile) { - synchronized(directory) { - assureOpen(); + public void setMergePolicy(MergePolicy mp) throws CorruptIndexException, IOException { + ensureOpen(); + if (mergePolicy != null && mergePolicy != mp) { if (indexWriter != null) { - indexWriter.setUseCompoundFile(useCompoundFile); + indexWriter.flush(); + indexWriter.releaseMergePolicy(); } - this.useCompoundFile = useCompoundFile; + mergePolicy.close(); } + mergePolicy = mp; + if (indexWriter != null) { + indexWriter.setMergePolicy(mergePolicy); + } } /** - * @see IndexModifier#setUseCompoundFile(boolean) + * @see #setMergePolicy(MergePolicy) + */ + public MergePolicy getMergePolicy() { + return mergePolicy; + } + + + /** + * @deprecated + * @see LogDocMergePolicy#setUseCompoundFile(boolean) + * @throws IllegalStateException if the index is closed + */ + public void setUseCompoundFile(boolean value) { + ((LogDocMergePolicy) getMergePolicy()).setUseCompoundFile(value); + } + + /** + * @deprecated + * @see LogDocMergePolicy#getUseCompoundFile(boolean) * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer * has this index open (write.lock could not @@ -428,11 +463,7 @@ * @throws IOException if there is a low-level IO error */ public boolean getUseCompoundFile() throws CorruptIndexException, LockObtainFailedException, IOException { - synchronized(directory) { - assureOpen(); - createIndexWriter(); - return indexWriter.getUseCompoundFile(); - } + return ((LogDocMergePolicy) getMergePolicy()).getUseCompoundFile(); } /** @@ -451,7 +482,7 @@ */ public void setMaxFieldLength(int maxFieldLength) { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { indexWriter.setMaxFieldLength(maxFieldLength); } @@ -469,7 +500,7 @@ */ public int getMaxFieldLength() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); return indexWriter.getMaxFieldLength(); } @@ -479,8 +510,7 @@ * Determines the minimal number of documents required before the buffered * in-memory documents are merging and a new Segment is created. * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory}, - * large value gives faster indexing. At the same time, mergeFactor limits - * the number of files open in a FSDirectory. + * large value gives faster indexing. * *

The default value is 10. * @@ -490,7 +520,7 @@ */ public void setMaxBufferedDocs(int maxBufferedDocs) { synchronized(directory) { - assureOpen(); + ensureOpen(); if (indexWriter != null) { indexWriter.setMaxBufferedDocs(maxBufferedDocs); } @@ -508,36 +538,26 @@ */ public int getMaxBufferedDocs() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); + ensureOpen(); createIndexWriter(); return indexWriter.getMaxBufferedDocs(); } } /** - * Determines how often segment indices are merged by addDocument(). With - * smaller values, less RAM is used while indexing, and searches on - * unoptimized indices are faster, but indexing speed is slower. With larger - * values, more RAM is used during indexing, and while searches on unoptimized - * indices are slower, indexing is faster. Thus larger values (> 10) are best - * for batch index creation, and smaller values (< 10) for indices that are - * interactively maintained. - *

This must never be less than 2. The default value is 10. - * - * @see IndexWriter#setMergeFactor(int) + * @deprecated + * @see LogDocMergePolicy#setMergeFactor(int) * @throws IllegalStateException if the index is closed */ public void setMergeFactor(int mergeFactor) { synchronized(directory) { - assureOpen(); - if (indexWriter != null) { - indexWriter.setMergeFactor(mergeFactor); - } - this.mergeFactor = mergeFactor; + ensureOpen(); + ((LogDocMergePolicy) getMergePolicy()).setMergeFactor(mergeFactor); } } /** + * @deprecated * @see IndexModifier#setMergeFactor(int) * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer @@ -547,9 +567,8 @@ */ public int getMergeFactor() throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { - assureOpen(); - createIndexWriter(); - return indexWriter.getMergeFactor(); + ensureOpen(); + return ((LogDocMergePolicy) getMergePolicy()).getMergeFactor(); } } @@ -565,12 +584,17 @@ if (!open) throw new IllegalStateException("Index is closed already"); if (indexWriter != null) { + indexWriter.flush(); + indexWriter.releaseMergePolicy(); indexWriter.close(); indexWriter = null; } else { indexReader.close(); indexReader = null; } + if (mergePolicy != null) { + mergePolicy.close(); + } open = false; } } Index: src/java/org/apache/lucene/index/IndexMerger.java =================================================================== --- src/java/org/apache/lucene/index/IndexMerger.java (revision 0) +++ src/java/org/apache/lucene/index/IndexMerger.java (revision 0) @@ -0,0 +1,80 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; + +import java.io.IOException; + +/** + * The set of operations needed by a MergePolicy object + * to trigger individual primitive merge operations. Currently only + * implemented by IndexWriter. + */ + +interface IndexMerger { + + /** + * The primary primitive merge operation + * + * @param m specification of the desired merge + * @return number of documents in the new segment (or -1 if indeterminate) + */ + + int merge(MergePolicy.MergeSpecification m) + throws MergePolicy.MergeException, CorruptIndexException, IOException; + + /** + * Identifies the target directory of a merge so that the merge + * policy can determine if segments need to be copied between + * directories. + * + * @return the directory that new segments will be created within. + */ + Directory getDirectory(); + + /** + * Returns 0 if this writer is flushing by RAM usage, else + * returns the number of buffered added documents that will + * trigger a flush. + * @see #setMaxBufferedDocs + */ + int getMaxBufferedDocs(); + + /** + * Returns 0.0 if this writer is flushing by document + * count, else returns the value set by {@link + * #setRAMBufferSizeMB}. + */ + double getRAMBufferSizeMB(); + + /** + * High-level optimize request; generally will result in one or more + * primiteve merge requests via the merge policy. + */ + void optimize() + throws CorruptIndexException, IOException; + + /** + * High-level merge request; generally will result in one or more + * primitive merge requests via the merge policy. + */ + void merge() + throws CorruptIndexException, IOException; + +} Property changes on: src/java/org/apache/lucene/index/IndexMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/MergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/MergePolicy.java (revision 0) +++ src/java/org/apache/lucene/index/MergePolicy.java (revision 0) @@ -0,0 +1,128 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * A merge policy determines the sequence of primitive merge + * operations to be used for overall merge and optimize + * operations. Each merge or optimize call on the merge policy results + * in zero or more primitive merge calls on an + * IndexMerger. The merge policy is called by + * IndexWriter with the current list of all segments and + * it, in turn, calls to an IndexMerger (typically the + * writer, again), with individual merge requests, each of which + * generates a single new segment from the contents of a subset of all + * segments. + */ + +/* Notes: a merge policy piggybacks on the synchronization of the + * caller (persuably the merger, e.g., IndexWriter). Non-concurrent + * mergers do little internal synchronization. + */ + +interface MergePolicy { + + /** + * A merge specification provides the information necessary to + * perform an individual primitive merge operation, resulting in a + * single new segment. The merge spec includes the subset of + * segments to be merged as well as various options such as whether + * the new segment should or should not be in compound format. + */ + + public static class MergeSpecification implements Cloneable { + /** + * The subset of segments to be included in the primitive merge. + */ + public SegmentInfos segments; + + /** + * Indicate the format to be used in the resulting segment. + */ + public boolean useCompoundFile; + + /** + * Publicly avaliable, correctly typed clone(). + */ + public MergeSpecification copy() { + try { + MergeSpecification c = (MergeSpecification)super.clone(); + c.segments = (SegmentInfos)segments.clone(); + return c; + } catch (CloneNotSupportedException cnse) { + throw new RuntimeException ("clone not supported on a MergeSpecification subclass"); + } + } + } + + /** + * Merge a sequence of segments. The policy determines what set of + * primitive merge operations constitute a high-level merge. + * + * @param segmentInfos the total set of segments in the index + */ + void merge(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException; + + /** + * Optimize a sequence of segments. The policy determines what set of + * primitive merge operations constitute an optimize. + * + * @param segmentInfos the total set of segments in the index + */ + void optimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException; + + /** + * Release all resources for the policy. + */ + void close(); + + /** + * Returns an indication of whether a new (not from merge) segment + * should be compound or not. + */ + boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment); + + /** + * Returns an indication of whether doc store files should be compound + * or not. + */ + boolean useCompoundDocStore(SegmentInfos segments); + + /* MergeException is a placeholder for other exceptions in derived + * classes. SerialMerge policies never throws it so if it catches + * one, so if they catch one, it turns into a runtime error, which it is. + */ + + class MergeException extends Exception { + } + + void checkedMerge(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException, MergeException; + + void checkedOptimize(SegmentInfos segmentInfos, + IndexMerger merger) + throws CorruptIndexException, IOException, MergeException; + +} Property changes on: src/java/org/apache/lucene/index/MergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 567805) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -161,7 +161,7 @@ * referenced by the "front" of the index). For this, IndexFileDeleter * keeps track of the last non commit checkpoint. */ -public class IndexWriter { +public class IndexWriter implements IndexMerger { /** * Default value for the write lock timeout (1,000). @@ -177,9 +177,10 @@ public static final String WRITE_LOCK_NAME = "write.lock"; /** - * Default value is 10. Change using {@link #setMergeFactor(int)}. + * @deprecated + * @see: LogDocMergePolicy.DEFAULT_MERGE_FACTOR */ - public final static int DEFAULT_MERGE_FACTOR = 10; + public final static int DEFAULT_MERGE_FACTOR = LogDocMergePolicy.DEFAULT_MERGE_FACTOR; /** * Default value is 10. Change using {@link #setMaxBufferedDocs(int)}. @@ -205,7 +206,8 @@ public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000; /** - * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}. + * @deprecated + * @see: LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS */ public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; @@ -239,7 +241,7 @@ private boolean localAutoCommit; // saved autoCommit during local transaction private boolean autoCommit = true; // false if we should commit only on close - SegmentInfos segmentInfos = new SegmentInfos(); // the segments + private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private DocumentsWriter docWriter; private IndexFileDeleter deleter; @@ -255,14 +257,8 @@ // The key is delete term; the value is number of ram // segments the term applies to. private HashMap bufferedDeleteTerms = new HashMap(); - private int numBufferedDeleteTerms = 0; + /* private */ int numBufferedDeleteTerms = 0; - /** Use compound file setting. Defaults to true, minimizing the number of - * files used. Setting this to false may improve indexing performance, but - * may also cause file handle problems. - */ - private boolean useCompoundFile = true; - private boolean closeDir; private boolean closed; @@ -278,23 +274,22 @@ } } - /** Get the current setting of whether to use the compound file format. - * Note that this just returns the value you set with setUseCompoundFile(boolean) - * or the default. You cannot use this to query the status of an existing index. - * @see #setUseCompoundFile(boolean) + /** + * @deprecated + * @see LogDocMergePolicy#getUseCompoundFile() + * */ public boolean getUseCompoundFile() { - ensureOpen(); - return useCompoundFile; + return ((LogDocMergePolicy) getMergePolicy()).getUseCompoundFile(); } - /** Setting to turn on usage of a compound file. When on, multiple files - * for each segment are merged into a single file once the segment creation - * is finished. This is done regardless of what directory is in use. + /** + * @deprecated + * @see LogDocMergePolicy#setUseCompoundFile(boolean) + * */ public void setUseCompoundFile(boolean value) { - ensureOpen(); - useCompoundFile = value; + ((LogDocMergePolicy) getMergePolicy()).setUseCompoundFile(value); } /** Expert: Set the Similarity implementation used by this IndexWriter. @@ -652,16 +647,17 @@ } } - /** Determines the largest number of documents ever merged by addDocument(). - * Small values (e.g., less than 10,000) are best for interactive indexing, - * as this limits the length of pauses while indexing to a few seconds. - * Larger values are best for batched indexing and speedier searches. - * - *

The default value is {@link Integer#MAX_VALUE}. + private MergePolicy mergePolicy = new LogDocMergePolicy(); + + /** + * Set the merge policy used by this IndexWriter */ - public void setMaxMergeDocs(int maxMergeDocs) { + public void setMergePolicy(MergePolicy mp) { ensureOpen(); - this.maxMergeDocs = maxMergeDocs; + if ( mergePolicy != null && mergePolicy != mp ) { + mergePolicy.close(); + } + mergePolicy = mp; } /** @@ -669,12 +665,40 @@ * single segment. * @see #setMaxMergeDocs */ - public int getMaxMergeDocs() { + public MergePolicy getMergePolicy() { ensureOpen(); - return maxMergeDocs; + return mergePolicy; } /** + * Release the merge policy used by this code. + * + * Currently used by + * IndexModifier since it "owns" the merge policy and thus the + * policy shouldn't be closed when the writer is closed. + */ + public void releaseMergePolicy() { + ensureOpen(); + mergePolicy = null; + } + + /** + * @deprecated + * @see MergePolicy#setMaxMergeDocs(int) + */ + public void setMaxMergeDocs(int maxMergeDocs) { + ((LogDocMergePolicy) getMergePolicy()).setMaxMergeDocs(maxMergeDocs); + } + + /** + * @deprecated + * @see MergePolicy#getMaxMergeDocs() + */ + public int getMaxMergeDocs() { + return ((LogDocMergePolicy) getMergePolicy()).getMaxMergeDocs(); + } + + /** * The maximum number of terms that will be indexed for a single field in a * document. This limits the amount of memory required for indexing, so that * collections with very large files will not crash the indexing process by @@ -717,6 +741,7 @@ * @throws IllegalArgumentException if maxBufferedDocs is * smaller than 2 * @see #setRAMBufferSizeMB + * */ public void setMaxBufferedDocs(int maxBufferedDocs) { ensureOpen(); @@ -788,32 +813,20 @@ return maxBufferedDeleteTerms; } - /** Determines how often segment indices are merged by addDocument(). With - * smaller values, less RAM is used while indexing, and searches on - * unoptimized indices are faster, but indexing speed is slower. With larger - * values, more RAM is used during indexing, and while searches on unoptimized - * indices are slower, indexing is faster. Thus larger values (> 10) are best - * for batch index creation, and smaller values (< 10) for indices that are - * interactively maintained. - * - *

This must never be less than 2. The default value is 10. + /** + * @deprecated + * @see LogDocMergePolicy#setMergeFactor(int) */ public void setMergeFactor(int mergeFactor) { - ensureOpen(); - if (mergeFactor < 2) - throw new IllegalArgumentException("mergeFactor cannot be less than 2"); - this.mergeFactor = mergeFactor; + ((LogDocMergePolicy) getMergePolicy()).setMergeFactor(mergeFactor); } /** - * Returns the number of segments that are merged at once - * and also controls the total number of segments allowed - * to accumulate in the index. - * @see #setMergeFactor + * @deprecated + * @see LogDocMergePolicy#getMergeFactor */ public int getMergeFactor() { - ensureOpen(); - return mergeFactor; + return ((LogDocMergePolicy) getMergePolicy()).getMergeFactor(); } /** If non-null, this will be the default infoStream used @@ -839,7 +852,6 @@ */ public void setInfoStream(PrintStream infoStream) { ensureOpen(); - this.infoStream = infoStream; docWriter.setInfoStream(infoStream); deleter.setInfoStream(infoStream); } @@ -935,6 +947,11 @@ rollbackSegmentInfos = null; } + if (mergePolicy != null) { + mergePolicy.close(); + mergePolicy = null; + } + if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; @@ -949,10 +966,12 @@ /** Tells the docWriter to close its currently open shared * doc stores (stored fields & vectors files). */ - private void flushDocStores() throws IOException { + private synchronized boolean flushDocStores() throws IOException { List files = docWriter.files(); + boolean useCompoundDocStore = false; + if (files.size() > 0) { String docStoreSegment; @@ -965,7 +984,9 @@ docWriter.abort(); } - if (useCompoundFile && docStoreSegment != null) { + useCompoundDocStore = mergePolicy.useCompoundDocStore(segmentInfos); + + if (useCompoundDocStore && docStoreSegment != null) { // Now build compound doc store file checkpoint(); @@ -1006,6 +1027,8 @@ deleter.checkpoint(segmentInfos, false); } } + + return useCompoundDocStore; } /** Release the write lock, if needed. */ @@ -1073,23 +1096,19 @@ *

This method periodically flushes pending documents * to the Directory (every {@link #setMaxBufferedDocs}), * and also periodically merges segments in the index - * (every {@link #setMergeFactor} flushes). When this + * by calling {@link MergePolicy#setMergeFactor}. When this * occurs, the method will take more time to run (possibly * a long time if the index is large), and will require * free temporary space in the Directory to do the * merging.

* - *

The amount of free space required when a merge is - * triggered is up to 1X the size of all segments being - * merged, when no readers/searchers are open against the - * index, and up to 2X the size of all segments being - * merged when readers/searchers are open against the - * index (see {@link #optimize()} for details). Most - * merges are small (merging the smallest segments - * together), but whenever a full merge occurs (all - * segments in the index, which is the worst case for - * temporary space usage) then the maximum free disk space - * required is the same as {@link #optimize}.

+ *

The amount of free space required when a merge is triggered is + * up to 1X the size of all segments being merged, when no + * readers/searchers are open against the index, and up to 2X the + * size of all segments being merged when readers/searchers are open + * against the index (see {@link #optimize()} for details). The + * sequence of primitive merge operations performed is governed by + * the merge policy. * *

Note that each term in the document can be no longer * than 16383 characters, otherwise an @@ -1121,6 +1140,8 @@ try { success = docWriter.addDocument(doc, analyzer); } catch (IOException ioe) { + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; deleter.refresh(); throw ioe; } @@ -1196,6 +1217,8 @@ try { success = docWriter.addDocument(doc, analyzer); } catch (IOException ioe) { + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; deleter.refresh(); throw ioe; } @@ -1224,55 +1247,37 @@ } } - final String newSegmentName() { + final synchronized String newSegmentName() { return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } - /** Determines how often segment indices are merged by addDocument(). With - * smaller values, less RAM is used while indexing, and searches on - * unoptimized indices are faster, but indexing speed is slower. With larger - * values, more RAM is used during indexing, and while searches on unoptimized - * indices are slower, indexing is faster. Thus larger values (> 10) are best - * for batch index creation, and smaller values (< 10) for indices that are - * interactively maintained. - * - *

This must never be less than 2. The default value is {@link #DEFAULT_MERGE_FACTOR}. - - */ - private int mergeFactor = DEFAULT_MERGE_FACTOR; - /** Determines amount of RAM usage by the buffered docs at * which point we trigger a flush to the index. */ private double ramBufferSize = DEFAULT_RAM_BUFFER_SIZE_MB*1024F*1024F; - /** Determines the largest number of documents ever merged by addDocument(). - * Small values (e.g., less than 10,000) are best for interactive indexing, - * as this limits the length of pauses while indexing to a few seconds. - * Larger values are best for batched indexing and speedier searches. - * - *

The default value is {@link #DEFAULT_MAX_MERGE_DOCS}. - - */ - private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; - /** If non-null, information about merges will be printed to this. */ private PrintStream infoStream = null; - private static PrintStream defaultInfoStream = null; - /** Merges all segments together into a single segment, - * optimizing an index for search. + /** + * Requests an "optimize" operation on an index, priming the index + * for the fastest available search. Traditionally this has meant + * merging all segments into a single segment as is done in the + * default merge policy, but individaul merge policies may implement + * optimize in different ways. * + * @see LogDocMergePolicy#optimize(SegmentInfos) + * *

It is recommended that this method be called upon completion of indexing. In * environments with frequent updates, optimize is best done during low volume times, if at all. * *

*

See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.

* - *

Note that this requires substantial temporary free + *

Note that this can require substantial temporary free * space in the Directory (see LUCENE-764 * for details):

@@ -1310,7 +1315,7 @@ *

The actual temporary usage could be much less than * these figures (it depends on many factors).

* - *

Once the optimize completes, the total size of the + *

In general, once the optimize completes, the total size of the * index will be less than the size of the starting index. * It could be quite a bit smaller (if there were many * pending deletes) or just slightly smaller.

@@ -1330,18 +1335,21 @@ public synchronized void optimize() throws CorruptIndexException, IOException { ensureOpen(); flush(); - while (segmentInfos.size() > 1 || - (segmentInfos.size() == 1 && - (SegmentReader.hasDeletions(segmentInfos.info(0)) || - SegmentReader.hasSeparateNorms(segmentInfos.info(0)) || - segmentInfos.info(0).dir != directory || - (useCompoundFile && - !segmentInfos.info(0).getUseCompoundFile())))) { - int minSegment = segmentInfos.size() - mergeFactor; - mergeSegments(minSegment < 0 ? 0 : minSegment, segmentInfos.size()); - } + mergePolicy.optimize(segmentInfos, this); } + /** + * Requests a merge operation on an index. In memory data is first + * flushed to disk and then the merge policy called. + * + * Explict calls to merge() are usually not necessary. The most + * common case when they would be used are when merge policy + * parameters change or when concurrent merges are used. + */ + public synchronized void merge() throws CorruptIndexException, IOException { + flush(); + } + /* * Begin a transaction. During a transaction, any segment * merges that happen (or ram segments flushed) will not @@ -1355,7 +1363,7 @@ * within the transactions, so they must be flushed before the * transaction is started. */ - private void startTransaction() throws IOException { + synchronized void startTransaction() throws IOException { assert numBufferedDeleteTerms == 0 : "calling startTransaction with buffered delete terms not supported"; @@ -1378,7 +1386,7 @@ * Rolls back the transaction and restores state to where * we were at the start. */ - private void rollbackTransaction() throws IOException { + synchronized void rollbackTransaction() throws IOException { // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; @@ -1407,7 +1415,7 @@ * segments file and remove and pending deletions we have * accumulated during the transaction */ - private void commitTransaction() throws IOException { + synchronized void commitTransaction() throws IOException { // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; @@ -1455,6 +1463,10 @@ segmentInfos.clear(); segmentInfos.addAll(rollbackSegmentInfos); + // discard any buffered delete terms so they aren't applied in close() + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; + docWriter.abort(); // Ask deleter to locate unreferenced files & remove @@ -1462,9 +1474,6 @@ deleter.checkpoint(segmentInfos, false); deleter.refresh(); - bufferedDeleteTerms.clear(); - numBufferedDeleteTerms = 0; - commitPending = false; docWriter.abort(); close(); @@ -1481,7 +1490,7 @@ * commit the change immediately. Else, we mark * commitPending. */ - private void checkpoint() throws IOException { + private synchronized void checkpoint() throws IOException { if (autoCommit) { segmentInfos.write(directory); if (infoStream != null) @@ -1541,7 +1550,7 @@ throws CorruptIndexException, IOException { ensureOpen(); - optimize(); // start with zero or 1 seg + flush(); int start = segmentInfos.size(); @@ -1558,15 +1567,8 @@ } } - // merge newly added segments in log(n) passes - while (segmentInfos.size() > start+mergeFactor) { - for (int base = start; base < segmentInfos.size(); base++) { - int end = Math.min(segmentInfos.size(), base+mergeFactor); - if (end-base > 1) { - mergeSegments(base, end); - } - } - } + mergePolicy.optimize(segmentInfos, this); + success = true; } finally { if (success) { @@ -1575,8 +1577,7 @@ rollbackTransaction(); } } - - optimize(); // final cleanup + } /** @@ -1598,40 +1599,10 @@ */ public synchronized void addIndexesNoOptimize(Directory[] dirs) throws CorruptIndexException, IOException { - // Adding indexes can be viewed as adding a sequence of segments S to - // a sequence of segments T. Segments in T follow the invariants but - // segments in S may not since they could come from multiple indexes. - // Here is the merge algorithm for addIndexesNoOptimize(): - // - // 1 Flush ram. - // 2 Consider a combined sequence with segments from T followed - // by segments from S (same as current addIndexes(Directory[])). - // 3 Assume the highest level for segments in S is h. Call - // maybeMergeSegments(), but instead of starting w/ lowerBound = -1 - // and upperBound = maxBufferedDocs, start w/ lowerBound = -1 and - // upperBound = upperBound of level h. After this, the invariants - // are guaranteed except for the last < M segments whose levels <= h. - // 4 If the invariants hold for the last < M segments whose levels <= h, - // if some of those < M segments are from S (not merged in step 3), - // properly copy them over*, otherwise done. - // Otherwise, simply merge those segments. If the merge results in - // a segment of level <= h, done. Otherwise, it's of level h+1 and call - // maybeMergeSegments() starting w/ upperBound = upperBound of level h+1. - // - // * Ideally, we want to simply copy a segment. However, directory does - // not support copy yet. In addition, source may use compound file or not - // and target may use compound file or not. So we use mergeSegments() to - // copy a segment, which may cause doc count to change because deleted - // docs are garbage collected. - // 1 flush ram - ensureOpen(); flush(); - // 2 copy segment infos and find the highest level from dirs - int startUpperBound = docWriter.getMaxBufferedDocs(); - /* new merge policy if (startUpperBound == 0) startUpperBound = 10; @@ -1654,64 +1625,13 @@ for (int j = 0; j < sis.size(); j++) { SegmentInfo info = sis.info(j); segmentInfos.addElement(info); // add each info - - while (startUpperBound < info.docCount) { - startUpperBound *= mergeFactor; // find the highest level from dirs - if (startUpperBound > maxMergeDocs) { - // upper bound cannot exceed maxMergeDocs - throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); - } - } } } - // 3 maybe merge segments starting from the highest level from dirs - maybeMergeSegments(startUpperBound); + mergePolicy.merge(segmentInfos, this); - // get the tail segments whose levels <= h - int segmentCount = segmentInfos.size(); - int numTailSegments = 0; - while (numTailSegments < segmentCount - && startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) { - numTailSegments++; - } - if (numTailSegments == 0) { - success = true; - return; - } - - // 4 make sure invariants hold for the tail segments whose levels <= h - if (checkNonDecreasingLevels(segmentCount - numTailSegments)) { - // identify the segments from S to be copied (not merged in 3) - int numSegmentsToCopy = 0; - while (numSegmentsToCopy < segmentCount - && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) { - numSegmentsToCopy++; - } - if (numSegmentsToCopy == 0) { - success = true; - return; - } - - // copy those segments from S - for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { - mergeSegments(i, i + 1); - } - if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { - success = true; - return; - } - } - - // invariants do not hold, simply merge those segments - mergeSegments(segmentCount - numTailSegments, segmentCount); - - // maybe merge segments again if necessary - if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { - maybeMergeSegments(startUpperBound * mergeFactor); - } - success = true; + } finally { if (success) { commitTransaction(); @@ -1721,14 +1641,9 @@ } } - /** Merges the provided indexes into this index. - *

After this completes, the index is optimized.

+ /** + *

See {@link #addIndexes(Directory[])} *

The provided IndexReaders are not closed.

- - *

See {@link #addIndexes(Directory[])} for - * details on transactional semantics, temporary free - * space required in the Directory, and non-CFS segments - * on an Exception.

* @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ @@ -1736,108 +1651,16 @@ throws CorruptIndexException, IOException { ensureOpen(); - optimize(); // start with zero or 1 seg - final String mergedName = newSegmentName(); - SegmentMerger merger = new SegmentMerger(this, mergedName); - - SegmentInfo info; - - IndexReader sReader = null; - try { - if (segmentInfos.size() == 1){ // add existing index, if any - sReader = SegmentReader.get(segmentInfos.info(0)); - merger.add(sReader); - } - - for (int i = 0; i < readers.length; i++) // add new indexes - merger.add(readers[i]); - - boolean success = false; - - startTransaction(); - - try { - int docCount = merger.merge(); // merge 'em - - if(sReader != null) { - sReader.close(); - sReader = null; - } - - segmentInfos.setSize(0); // pop old infos & add new - info = new SegmentInfo(mergedName, docCount, directory, false, true, - -1, null, false); - segmentInfos.addElement(info); - - success = true; - - } finally { - if (!success) { - rollbackTransaction(); - } else { - commitTransaction(); - } - } - } finally { - if (sReader != null) { - sReader.close(); - } + Directory[] dirs = new Directory[ readers.length ]; + for( int i = 0; i < dirs.length; i++ ) { + dirs[i] = readers[i].directory(); } - - if (useCompoundFile) { - boolean success = false; + addIndexes( dirs ); - startTransaction(); - - try { - merger.createCompoundFile(mergedName + ".cfs"); - info.setUseCompoundFile(true); - } finally { - if (!success) { - rollbackTransaction(); - } else { - commitTransaction(); - } - } - } } - // Overview of merge policy: - // - // A flush is triggered either by close() or by the number of ram segments - // reaching maxBufferedDocs. After a disk segment is created by the flush, - // further merges may be triggered. - // - // LowerBound and upperBound set the limits on the doc count of a segment - // which may be merged. Initially, lowerBound is set to 0 and upperBound - // to maxBufferedDocs. Starting from the rightmost* segment whose doc count - // > lowerBound and <= upperBound, count the number of consecutive segments - // whose doc count <= upperBound. - // - // Case 1: number of worthy segments < mergeFactor, no merge, done. - // Case 2: number of worthy segments == mergeFactor, merge these segments. - // If the doc count of the merged segment <= upperBound, done. - // Otherwise, set lowerBound to upperBound, and multiply upperBound - // by mergeFactor, go through the process again. - // Case 3: number of worthy segments > mergeFactor (in the case mergeFactor - // M changes), merge the leftmost* M segments. If the doc count of - // the merged segment <= upperBound, consider the merged segment for - // further merges on this same level. Merge the now leftmost* M - // segments, and so on, until number of worthy segments < mergeFactor. - // If the doc count of all the merged segments <= upperBound, done. - // Otherwise, set lowerBound to upperBound, and multiply upperBound - // by mergeFactor, go through the process again. - // Note that case 2 can be considerd as a special case of case 3. - // - // This merge policy guarantees two invariants if M does not change and - // segment doc count is not reaching maxMergeDocs: - // B for maxBufferedDocs, f(n) defined as ceil(log_M(ceil(n/B))) - // 1: If i (left*) and i+1 (right*) are two consecutive segments of doc - // counts x and y, then f(x) >= f(y). - // 2: The number of committed segments on the same level (f(n)) <= M. - // This is called after pending added and deleted // documents have been flushed to the Directory but before // the change is committed (new segments_N file written). @@ -1910,11 +1733,12 @@ // apply to more than just the last flushed segment boolean flushDeletes = bufferedDeleteTerms.size() > 0; - if (infoStream != null) + if (true && infoStream != null) infoStream.println(" flush: flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + - " numDocs=" + numDocs); + " numDocs=" + numDocs + + " numBufDelTerms=" + numBufferedDeleteTerms); int docStoreOffset = docWriter.getDocStoreOffset(); boolean docStoreIsCompoundFile = false; @@ -1927,9 +1751,8 @@ if (infoStream != null) infoStream.println(" flush shared docStore segment " + docStoreSegment); - flushDocStores(); + docStoreIsCompoundFile = flushDocStores(); flushDocStores = false; - docStoreIsCompoundFile = useCompoundFile; } String segment = docWriter.getSegment(); @@ -1992,6 +1815,8 @@ segmentInfos.clear(); segmentInfos.addAll(rollback); + // System.err.println( "restore buf deletes " + saveNumBufferedDeleteTerms + " " + + // numBufferedDeleteTerms ); if (saveBufferedDeleteTerms != null) { numBufferedDeleteTerms = saveNumBufferedDeleteTerms; bufferedDeleteTerms = saveBufferedDeleteTerms; @@ -2013,7 +1838,8 @@ deleter.checkpoint(segmentInfos, autoCommit); - if (flushDocs && useCompoundFile) { + if (flushDocs && mergePolicy.useCompoundFile(segmentInfos, + newSegment)) { success = false; try { docWriter.createCompoundFile(segment); @@ -2030,14 +1856,15 @@ deleter.checkpoint(segmentInfos, autoCommit); } - /* new merge policy - if (0 == docWriter.getMaxBufferedDocs()) - maybeMergeSegments(mergeFactor * numDocs / 2); - else - maybeMergeSegments(docWriter.getMaxBufferedDocs()); - */ - if (triggerMerge) - maybeMergeSegments(docWriter.getMaxBufferedDocs()); + if (triggerMerge) { + /* new merge policy + if (0 == docWriter.getMaxBufferedDocs()) + mergePolicy.merge(segmentInfos,mergeFactor * numDocs / 2); + else + mergePolicy.merge(segmentInfos,docWriter.getMaxBufferedDocs()); + */ + mergePolicy.merge(segmentInfos, this); + } } } finally { docWriter.clearFlushPending(); @@ -2061,79 +1888,60 @@ return docWriter.getNumDocsInRAM(); } - /** Incremental segment merger. */ - private final void maybeMergeSegments(int startUpperBound) throws CorruptIndexException, IOException { - long lowerBound = -1; - long upperBound = startUpperBound; - - /* new merge policy - if (upperBound == 0) upperBound = 10; - */ - - while (upperBound < maxMergeDocs) { - int minSegment = segmentInfos.size(); - int maxSegment = -1; - - // find merge-worthy segments - while (--minSegment >= 0) { - SegmentInfo si = segmentInfos.info(minSegment); - - if (maxSegment == -1 && si.docCount > lowerBound && si.docCount <= upperBound) { - // start from the rightmost* segment whose doc count is in bounds - maxSegment = minSegment; - } else if (si.docCount > upperBound) { - // until the segment whose doc count exceeds upperBound - break; - } + synchronized int find( SegmentInfos segmentInfos, SegmentInfo segment ) { + for(int i=0; i < segmentInfos.size(); i++) { + if (segmentInfos.info(i).name.equals( segment.name)) { + return i; } + } + return -1; + } - minSegment++; - maxSegment++; - int numSegments = maxSegment - minSegment; + /* FIXME if we want to support non-contiguous segment merges */ + synchronized void replace(MergePolicy.MergeSpecification spec, SegmentInfo info) { - if (numSegments < mergeFactor) { - break; - } else { - boolean exceedsUpperLimit = false; + int first = find(segmentInfos, spec.segments.info(0)); + int last = find(segmentInfos, spec.segments.info(spec.segments.size() - 1)); - // number of merge-worthy segments may exceed mergeFactor when - // mergeFactor and/or maxBufferedDocs change(s) - while (numSegments >= mergeFactor) { - // merge the leftmost* mergeFactor segments + last++; - int docCount = mergeSegments(minSegment, minSegment + mergeFactor); - numSegments -= mergeFactor; + if (!(first >= 0) || !(last >= 1) || !(last - first == spec.segments.size())) { + throw new RuntimeException("bad replace spec"); + } - if (docCount > upperBound) { - // continue to merge the rest of the worthy segments on this level - minSegment++; - exceedsUpperLimit = true; - } else { - // if the merged segment does not exceed upperBound, consider - // this segment for further merges on this same level - numSegments++; - } - } + // nocommit + // System.err.println( "before ++" ); + for( int i=0; i < segmentInfos.size(); i++ ) { + // System.err.println( segmentInfos.info(i).name ); + } - if (!exceedsUpperLimit) { - // if none of the merged segments exceed upperBound, done - break; - } - } + segmentInfos.subList(first, last).clear(); + segmentInfos.add(first, info); - lowerBound = upperBound; - upperBound *= mergeFactor; + // nocommit + // System.err.println( "after ++" ); + for( int i=0; i < segmentInfos.size(); i++ ) { + // System.err.println( segmentInfos.info(i).name ); } + } /** - * Merges the named range of segments, replacing them in the stack with a + * Merges the indicated segments, replacing them in the stack with a * single segment. */ - private final int mergeSegments(int minSegment, int end) + /* NB: NOT SYNCHRONIZED: cannot touch segmentInfos, among other things, + * w/o explicit synchronization. + */ + public final int merge(MergePolicy.MergeSpecification spec) throws CorruptIndexException, IOException { + SegmentInfos sourceSegments = spec.segments; + + int minSegment = 0; + int end = sourceSegments.size(); + final String mergedName = newSegmentName(); SegmentMerger merger = null; @@ -2160,7 +1968,7 @@ // Test each segment to be merged for (int i = minSegment; i < end; i++) { - SegmentInfo si = segmentInfos.info(i); + SegmentInfo si = sourceSegments.info(i); // If it has deletions we must merge the doc stores if (si.hasDeletions()) @@ -2211,7 +2019,7 @@ docStoreSegment = null; docStoreIsCompoundFile = false; } else { - SegmentInfo si = segmentInfos.info(minSegment); + SegmentInfo si = sourceSegments.info(minSegment); docStoreOffset = si.getDocStoreOffset(); docStoreSegment = si.getDocStoreSegment(); docStoreIsCompoundFile = si.getDocStoreIsCompoundFile(); @@ -2227,7 +2035,7 @@ merger = new SegmentMerger(this, mergedName); for (int i = minSegment; i < end; i++) { - SegmentInfo si = segmentInfos.info(i); + SegmentInfo si = sourceSegments.info(i); if (infoStream != null) infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); IndexReader reader = SegmentReader.get(si, MERGE_READ_BUFFER_SIZE, mergeDocStores); // no need to set deleter (yet) @@ -2253,13 +2061,10 @@ docStoreSegment, docStoreIsCompoundFile); - rollback = (SegmentInfos) segmentInfos.clone(); + rollback = (SegmentInfos) sourceSegments.clone(); - for (int i = end-1; i > minSegment; i--) // remove old infos & add new - segmentInfos.remove(i); + replace(spec, newSegment); - segmentInfos.set(minSegment, newSegment); - checkpoint(); success = true; @@ -2271,8 +2076,8 @@ // instances, but keep original SegmentInfos // instance (so we don't try to write again the // same segments_N file -- write once): - segmentInfos.clear(); - segmentInfos.addAll(rollback); + sourceSegments.clear(); + sourceSegments.addAll(rollback); } // Delete any partially created and now unreferenced files: @@ -2287,8 +2092,12 @@ } // Give deleter a chance to remove files now. - deleter.checkpoint(segmentInfos, autoCommit); + synchronized(this) { + deleter.checkpoint(segmentInfos, autoCommit); + } + boolean useCompoundFile = spec.useCompoundFile; + if (useCompoundFile) { boolean success = false; @@ -2315,6 +2124,7 @@ return mergedDocCount; } + // Called during flush to apply any buffered deletes. If // flushedNewSegment is true then a new segment was just // created and flushed from the ram segments, so we will @@ -2337,6 +2147,7 @@ // Apply delete terms to the segment just flushed from ram // apply appropriately so that a delete term is only applied to // the documents buffered before it, not those buffered after it. + // System.err.println("apply deletes to new"); // nocommit applyDeletesSelectively(bufferedDeleteTerms, reader); } finally { if (reader != null) { @@ -2361,6 +2172,7 @@ // Apply delete terms to disk segments // except the one just flushed from ram. + // System.err.println("apply deletes to " + i); // nocommit applyDeletes(bufferedDeleteTerms, reader); } finally { if (reader != null) { @@ -2374,7 +2186,6 @@ } // Clean up bufferedDeleteTerms. - // Rollbacks of buffered deletes are based on restoring the old // map, so don't modify this one. Rare enough that the gc // overhead is almost certainly lower than the alternate, which @@ -2385,29 +2196,6 @@ } } - private final boolean checkNonDecreasingLevels(int start) { - int lowerBound = -1; - int upperBound = docWriter.getMaxBufferedDocs(); - - /* new merge policy - if (upperBound == 0) - upperBound = 10; - */ - - for (int i = segmentInfos.size() - 1; i >= start; i--) { - int docCount = segmentInfos.info(i).docCount; - if (docCount <= lowerBound) { - return false; - } - - while (docCount > upperBound) { - lowerBound = upperBound; - upperBound *= mergeFactor; - } - } - return true; - } - // For test purposes. final synchronized int getBufferedDeleteTermsSize() { return bufferedDeleteTerms.size(); @@ -2442,6 +2230,7 @@ private void bufferDeleteTerm(Term term) { Num num = (Num) bufferedDeleteTerms.get(term); int numDoc = docWriter.getNumDocsInRAM(); + // System.err.println( "buf delete " + term ); // nocommit if (num == null) { bufferedDeleteTerms.put(term, new Num(numDoc)); } else { @@ -2484,7 +2273,47 @@ Iterator iter = deleteTerms.entrySet().iterator(); while (iter.hasNext()) { Entry entry = (Entry) iter.next(); + // System.err.println("apply " + entry.getKey()); // nocommit reader.deleteDocuments((Term) entry.getKey()); } } + + // utitliy routines for tests + SegmentInfo newestSegment() { + return segmentInfos.info(segmentInfos.size()-1); + } + + void checkpoint(Directory dir) throws IOException { + segmentInfos.write(dir); + } + + public String segString() { + StringBuffer buffer = new StringBuffer(); + for(int i = 0; i < segmentInfos.size(); i++) { + if (i > 0) { + buffer.append(' '); + } + + SegmentInfo info = segmentInfos.info(i); + + try { + if (info.getUseCompoundFile()) { + buffer.append('c'); + } else { + buffer.append('C'); + } + } catch (Exception e) { + } + + if (info.dir != getDirectory()) { + buffer.append('x'); + } + + buffer.append(info.docCount); + + } + + return buffer.toString(); + } + } Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 567805) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -122,6 +122,7 @@ throws CorruptIndexException, IOException { this.docWriter = docWriter; + infoStream = null; // nocommit this.infoStream = infoStream; if (infoStream != null)