Index: src/test/org/apache/lucene/index/TestLogarithmicMergePolicy.java =================================================================== --- src/test/org/apache/lucene/index/TestLogarithmicMergePolicy.java (revision 0) +++ src/test/org/apache/lucene/index/TestLogarithmicMergePolicy.java (revision 0) @@ -0,0 +1,197 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +public class TestLogarithmicMergePolicy extends TestCase { + + MockMerger merger; + LogarithmicMergePolicy policy; + + void set( String sizes ) { + merger = new MockMerger ( sizes ); + policy = new LogarithmicMergePolicy( merger ); + policy.setMaxMergeDocs(100000); + } + + void merge( String before, String after, int mergeFactor, int minMergeDocs ) + throws CorruptIndexException, IOException { + set( before ); + policy.setMinMergeDocs( minMergeDocs ); + policy.setMergeFactor( mergeFactor ); + policy.merge( merger.segmentInfos ); + MockMerger other = new MockMerger( after ); + // System.err.println( "< " + merger.toString() ); + // System.err.println( "> " + other.toString() ); + assertEquals( other.toString(), merger.toString() ); + } + + void merge( String before, String after, int mergeFactor ) + throws CorruptIndexException, IOException { + merge( before, after, mergeFactor, LogarithmicMergePolicy.DEFAULT_MAX_BUFFERED_DOCS ); + + } + + void merge( String before, String after ) + throws CorruptIndexException, IOException { + merge( before, + after, + LogarithmicMergePolicy.DEFAULT_MERGE_FACTOR, + LogarithmicMergePolicy.DEFAULT_MAX_BUFFERED_DOCS ); + } + + public void testOneMerge() + throws CorruptIndexException, IOException { + merge( "1000 10 0 10", "1000 10 10", 4, 4 ); + } + + public void testConsistentMerge() + throws CorruptIndexException, IOException { + + MockMerger other; + + merge( "0 10", "10" ); + merge( "1000 10 0 10", "1000 10 10", 4, 4 ); + merge( "1000 0 0 10 0 0 10", "1000 10 10", 4 ); + merge( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1", "1000 100 10 10 1 1 1 1" ); + merge( "9 9 9 9 9 9 9 9 9 9","90" ); + merge( "10 10 10 10 10 10 10 10 10 10", "100" ); + merge( "100 100 100 100 100 100 100 100 100 100", "1000" ); + merge( "1000 10 x10 x10 x10", "1000 10 10 10 10" ); + merge( "1000 9 x9 x9 x9", "1000 9 9 9 9" ); + merge( "1000 10 x10 x10 x10", "1000 40", 4 ); + merge( "1000 9 x9 x9 x9", "1000 36", 4 ); + merge( "1000 0 10 0 10", "1000 20", 4 ); + + } + + public void testInconsistentMerge() + throws CorruptIndexException, IOException { + + merge( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000", "1117 1007" ); + merge( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 10", "1000 100 19 15" ); + + } + + public void testChangeMergeFactor() + throws CorruptIndexException, IOException { + + MockMerger other; + + set( "10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10" ); + policy.setMergeFactor( 5 ); + policy.merge( merger.segmentInfos ); + other = new MockMerger( "250 10" ); + System.err.println( merger ); + System.err.println( other ); + assertEquals( other.toString(), merger.toString() ); + + set( "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10" ); + policy.setMergeFactor( 5 ); + policy.merge( merger.segmentInfos ); + other = new MockMerger( "10" ); + assertEquals( other.toString(), merger.toString() ); + } + + public void testOptimize() + throws CorruptIndexException, IOException { + + MockMerger other; + + set ( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000" ); + + other = new MockMerger( "2124" ); + policy.optimize( merger.segmentInfos ); + assertEquals( other.toString(), merger.toString() ); + + set ( "x1000" ); + + other = new MockMerger( "1000" ); + policy.optimize( merger.segmentInfos ); + assertEquals( other.toString(), merger.toString() ); + + set ( "1000 100 10 1" ); + + other = new MockMerger( "1111" ); + policy.optimize( merger.segmentInfos ); + assertEquals( other.toString(), merger.toString() ); + + } + + public void testDirs() + throws CorruptIndexException, IOException { + + MockMerger other; + + set ( "100 10 1" ); + + other = new MockMerger( "100 10 1" ); + assertEquals( other.toString(), merger.toString() ); + + other = new MockMerger( "100 x10 1" ); + assertNotEquals( other.toString(), merger.toString() ); + + set ( "100 x10 1" ); + policy.merge( merger.segmentInfos ); + + other = new MockMerger( "100 10 1" ); + assertEquals( other.toString(), merger.toString() ); + + } + + public void testInconsistent() { + + set( "1001 1000 1001 1000 101 11 1" ); + assertTrue( policy.isConsistent( merger.segmentInfos ) ); + + set( "1001 1000 1001 101 1000 101 11 1" ); + assertFalse( policy.isConsistent( merger.segmentInfos ) ); + + set( "1001 11 101 1" ); + assertFalse( policy.isConsistent( merger.segmentInfos ) ); + + set( "1001 101 11" ); + assertTrue( policy.isConsistent( merger.segmentInfos ) ); + + set( "100001 10001 1001 101 11" ); + try { + policy.isConsistent( merger.segmentInfos ); + fail(); + } catch ( IllegalArgumentException e ) { + } + + } + + public void testLowestConsistentBound() { + + set( "101 101 1001 101 1" ); + assertTrue( policy.lowestConsistentBound( merger.segmentInfos ) == 10000 ); + + set( "10000 1000 100 10 1 100 10 1" ); + assertTrue( policy.lowestConsistentBound( merger.segmentInfos ) == 1000 ); + + set( "1000 1000 100 10 1 1000" ); + assertTrue( policy.lowestConsistentBound( merger.segmentInfos ) == 10000 ); + + set( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000" ); + assertTrue( policy.lowestConsistentBound( merger.segmentInfos ) == 10000 ); + + } + +} Property changes on: src/test/org/apache/lucene/index/TestLogarithmicMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (revision 521898) +++ src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (working copy) @@ -157,10 +157,18 @@ writer.setMaxBufferedDocs(10); writer.setMergeFactor(5); + + System.err.println( "before adds" ); + // merge factor is changed, so check invariants after all adds for (int i = 0; i < 10; i++) { addDoc(writer); } + + System.err.println( "after adds" ); + + printSegmentDocCounts( writer ); + checkInvariants(writer); assertEquals(10, writer.docCount()); @@ -173,6 +181,41 @@ writer.addDocument(doc); } + public String segString( IndexWriter writer ) { + + SegmentInfos segmentInfos = writer.segmentInfos; + + StringBuffer buffer = new StringBuffer(); + + for( int i = 0; i < segmentInfos.size(); i++ ) { + + if ( i > 0 ) { + buffer.append( ' ' ); + } + + SegmentInfo info = segmentInfos.info( i ); + + try { + if ( info.getUseCompoundFile() ) { + buffer.append( 'c' ); + } else { + buffer.append( 'C' ); + } + } catch (Exception e) { + } + + if ( info.dir != writer.getDirectory() ) { + buffer.append( 'x' ); + } + + buffer.append( info.docCount ); + + } + + return buffer.toString(); + + } + private void checkInvariants(IndexWriter writer) throws IOException { int maxBufferedDocs = writer.getMaxBufferedDocs(); int mergeFactor = writer.getMergeFactor(); @@ -181,30 +224,37 @@ int ramSegmentCount = writer.getRamSegmentCount(); assertTrue(ramSegmentCount < maxBufferedDocs); - int lowerBound = -1; - int upperBound = maxBufferedDocs; + int currentBound = 0; + int nextBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.getSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.getDocCount(i); - assertTrue(docCount > lowerBound); - if (docCount <= upperBound) { + if ( docCount < currentBound ) { + System.err.println( docCount + " " + currentBound ); + System.err.println( i ); + System.err.println( segString( writer ) ); + } + + assertTrue(docCount >= currentBound); + + if (docCount < nextBound) { numSegments++; } else { - if (upperBound * mergeFactor <= maxMergeDocs) { + if (nextBound * mergeFactor < maxMergeDocs) { assertTrue(numSegments < mergeFactor); } do { - lowerBound = upperBound; - upperBound *= mergeFactor; - } while (docCount > upperBound); + currentBound = nextBound; + nextBound *= mergeFactor; + } while (docCount >= nextBound); numSegments = 1; } } - if (upperBound * mergeFactor <= maxMergeDocs) { + if (nextBound * mergeFactor <= maxMergeDocs) { assertTrue(numSegments < mergeFactor); } Index: src/test/org/apache/lucene/index/TestConcurrentLogarithmicMergePolicy.java =================================================================== --- src/test/org/apache/lucene/index/TestConcurrentLogarithmicMergePolicy.java (revision 0) +++ src/test/org/apache/lucene/index/TestConcurrentLogarithmicMergePolicy.java (revision 0) @@ -0,0 +1,259 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +public class TestConcurrentLogarithmicMergePolicy extends TestCase { + + MockConcurrentMerger merger; + ConcurrentLogarithmicMergePolicy policy; + + void set( String sizes ) { + merger = new MockConcurrentMerger ( sizes ); + policy = new ConcurrentLogarithmicMergePolicy( merger ); + merger.setMergePolicy( policy ); + policy.setMaxMergeDocs(100000); + } + + void merge( String before, String after, int mergeFactor, int minMergeDocs ) + throws CorruptIndexException, IOException { + set( before ); + policy.setMinMergeDocs( minMergeDocs ); + policy.setMergeFactor( mergeFactor ); + policy.merge( merger.segmentInfos ); + MockConcurrentMerger other = new MockConcurrentMerger( after ); + // System.err.println( "> " + other.toString() ); + // System.err.println( "< " + merger.toString() ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + } + + void merge( String before, String after, int mergeFactor ) + throws CorruptIndexException, IOException { + merge( before, after, mergeFactor, LogarithmicMergePolicy.DEFAULT_MAX_BUFFERED_DOCS ); + + } + + void merge( String before, String after ) + throws CorruptIndexException, IOException { + merge( before, + after, + LogarithmicMergePolicy.DEFAULT_MERGE_FACTOR, + LogarithmicMergePolicy.DEFAULT_MAX_BUFFERED_DOCS ); + } + + public void _testManyLevels() + throws CorruptIndexException, IOException { + + set ("100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100"); + policy.setMergeFactor( 2 ); + policy.merge( merger.segmentInfos ); + policy.mergeWait(); + System.err.println( "< " + merger.toString() ); + assertEquals( merger.toString(), "[0]c4000" ); + + } + + public void _testMultipleThreads() + throws CorruptIndexException, IOException { + + set ("1000 1000"); + policy.setMergeFactor( 2 ); + policy.merge( merger.segmentInfos ); + MockMerger other = new MockMerger( "1000 1000" ); + merger.add( other ); + policy.merge( merger.segmentInfos ); + policy.mergeWait(); + assertEquals( merger.toString(), "[0]c4000" ); + + } + + public void testClose() + throws CorruptIndexException, IOException { + + set ("1000 1000"); + policy.setMergeFactor( 2 ); + policy.merge( merger.segmentInfos ); + MockMerger other = new MockMerger( "1000 1000" ); + merger.add( other ); + policy.merge( merger.segmentInfos ); + policy.close(); + assertEquals( merger.toString(), "[0]c4000" ); + + } + + public void testParallelMerges() + throws CorruptIndexException, IOException { + + set ("1000 1000 1000 1000"); + policy.setMergeFactor( 2 ); + policy.merge( merger.segmentInfos ); + MockMerger other = new MockMerger( "1000 1000" ); + merger.add( other ); + policy.merge( merger.segmentInfos ); + policy.mergeWait(); + assertEquals( "[0]c4000 [1]c2000", merger.toString() ); + + } + + public void testConcurrentMerge() + throws CorruptIndexException, IOException { + + merge( "1 1 1 1 1 1 1 1 1 1", "10" ); + merge( "100 100 100 100 100 100 100 100 100 100", "1000" ); + + } + + public void testConsistentMerge() + throws CorruptIndexException, IOException { + + MockConcurrentMerger other; + + merge( "0 10", "10" ); + merge( "1000 10 0 10", "1000 10 10", 4, 4 ); + merge( "1000 0 0 10 0 0 10", "1000 10 10", 4 ); + merge( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1", "1000 100 10 10 1 1 1 1" ); + merge( "9 9 9 9 9 9 9 9 9 9","90" ); + merge( "10 10 10 10 10 10 10 10 10 10", "100" ); + merge( "100 100 100 100 100 100 100 100 100 100", "1000" ); + merge( "1000 10 x10 x10 x10", "1000 10 10 10 10" ); + merge( "1000 9 x9 x9 x9", "1000 9 9 9 9" ); + merge( "1000 10 x10 x10 x10", "1000 40", 4 ); + merge( "1000 9 x9 x9 x9", "1000 36", 4 ); + merge( "1000 0 10 0 10", "1000 20", 4 ); + + } + + public void testInconsistentMerge() + throws CorruptIndexException, IOException { + + merge( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000", "1117 1007" ); + merge( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 10", "1000 100 19 15" ); + + } + + public void testChangeMergeFactor() + throws CorruptIndexException, IOException { + + MockConcurrentMerger other; + + set( "10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10" ); + policy.setMergeFactor( 5 ); + policy.merge( merger.segmentInfos ); + other = new MockConcurrentMerger( "250 10" ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + + set( "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10" ); + policy.setMergeFactor( 5 ); + policy.merge( merger.segmentInfos ); + other = new MockConcurrentMerger( "10" ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + } + + public void testOptimize() + throws CorruptIndexException, IOException { + + MockConcurrentMerger other; + + set ( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000" ); + + other = new MockConcurrentMerger( "2124" ); + policy.optimize( merger.segmentInfos ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + + set ( "x1000" ); + + other = new MockConcurrentMerger( "1000" ); + policy.optimize( merger.segmentInfos ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + + set ( "1000 100 10 1" ); + + other = new MockConcurrentMerger( "1111" ); + policy.optimize( merger.segmentInfos ); + policy.mergeWait(); + assertEquals( other.toString(), merger.toString() ); + + } + + public void testDirs() + throws CorruptIndexException, IOException { + + MockConcurrentMerger other; + + set ( "100 10 1" ); + + other = new MockConcurrentMerger( "100 10 1" ); + assertEquals( other.toString(), merger.toString() ); + + other = new MockConcurrentMerger( "100 x10 1" ); + assertNotEquals( other.toString(), merger.toString() ); + + set ( "100 x10 1" ); + policy.merge( merger.segmentInfos ); + + other = new MockConcurrentMerger( "100 10 1" ); + assertEquals( other.toString(), merger.toString() ); + + } + + public void testInconsistent() { + + set( "1001 1000 1001 1000 101 11 1" ); + assertTrue( policy.isConsistent( merger.segmentInfos ) ); + + set( "1001 1000 1001 101 1000 101 11 1" ); + assertFalse( policy.isConsistent( merger.segmentInfos ) ); + + set( "1001 11 101 1" ); + assertFalse( policy.isConsistent( merger.segmentInfos ) ); + + set( "1001 101 11" ); + assertTrue( policy.isConsistent( merger.segmentInfos ) ); + + set( "100001 10001 1001 101 11" ); + try { + policy.isConsistent( merger.segmentInfos ); + fail(); + } catch ( IllegalArgumentException e ) { + } + + } + + public void testLowestConsistentBound() { + + set( "101 101 1001 101 1" ); + assertTrue( policy.lowestConsistentBound( merger.segmentInfos ) == 10000 ); + + set( "10000 1000 100 10 1 100 10 1" ); + assertTrue( policy.lowestConsistentBound( merger.segmentInfos ) == 1000 ); + + set( "1000 1000 100 10 1 1000" ); + assertTrue( policy.lowestConsistentBound( merger.segmentInfos ) == 10000 ); + + set( "1000 100 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1000" ); + assertTrue( policy.lowestConsistentBound( merger.segmentInfos ) == 10000 ); + + } + +} Property changes on: src/test/org/apache/lucene/index/TestConcurrentLogarithmicMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestCase.java =================================================================== --- src/test/org/apache/lucene/index/TestCase.java (revision 0) +++ src/test/org/apache/lucene/index/TestCase.java (revision 0) @@ -0,0 +1,13 @@ +package org.apache.lucene.index; + +public class TestCase extends junit.framework.TestCase { + + public void assertNotEquals( Object a, Object b ) { + assertFalse( a.equals( b ) ); + } + + public void testTestCase() { + assertNotEquals( "0", "1" ); + } + +} Property changes on: src/test/org/apache/lucene/index/TestCase.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 521898) +++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -120,7 +120,7 @@ int NUM_DIR = 50; int END_COUNT = START_COUNT + NUM_DIR*25; - boolean debug = false; + boolean debug = true; // Build up a bunch of dirs that have indexes which we // will then merge together by calling addIndexes(*): @@ -517,6 +517,9 @@ // commits on windows): public void testCreateWithReader() throws IOException { String tempDir = System.getProperty("java.io.tmpdir"); + if (tempDir == null) { + tempDir = "./test"; + } if (tempDir == null) throw new IOException("java.io.tmpdir undefined, cannot run test"); File indexDir = new File(tempDir, "lucenetestindexwriter"); @@ -554,6 +557,9 @@ // that takes File: public void testCreateWithReader2() throws IOException { String tempDir = System.getProperty("java.io.tmpdir"); + if (tempDir == null) { + tempDir = "./test"; + } if (tempDir == null) throw new IOException("java.io.tmpdir undefined, cannot run test"); File indexDir = new File(tempDir, "lucenetestindexwriter"); @@ -587,6 +593,9 @@ // that takes String: public void testCreateWithReader3() throws IOException { String tempDir = System.getProperty("tempDir"); + if (tempDir == null) { + tempDir = "./test"; + } if (tempDir == null) throw new IOException("java.io.tmpdir undefined, cannot run test"); Index: src/test/org/apache/lucene/index/MockDirectory.java =================================================================== --- src/test/org/apache/lucene/index/MockDirectory.java (revision 0) +++ src/test/org/apache/lucene/index/MockDirectory.java (revision 0) @@ -0,0 +1,34 @@ +package org.apache.lucene.index; + +import org.apache.lucene.store.FSDirectory; + +import java.util.Map; +import java.util.HashMap; + +class MockDirectory extends FSDirectory { + + static Map instances = new HashMap(); + + String name; + + static MockDirectory the( String name ) { + if ( !instances.containsKey( name ) ) { + instances.put( name, new MockDirectory ( name ) ); + } + return (MockDirectory)instances.get( name ); + } + + static MockDirectory the() { + return the(""); + } + + private MockDirectory( String name ) { + this.name = name; + } + + public String[] list() { + return new String[0]; + } + +} + Property changes on: src/test/org/apache/lucene/index/MockDirectory.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestMockDirectory.java =================================================================== --- src/test/org/apache/lucene/index/TestMockDirectory.java (revision 0) +++ src/test/org/apache/lucene/index/TestMockDirectory.java (revision 0) @@ -0,0 +1,13 @@ +package org.apache.lucene.index; + +public class TestMockDirectory extends TestCase { + + public void testMockDirectory() { + + assertTrue( MockDirectory.the() == MockDirectory.the() ); + assertFalse( MockDirectory.the("a") == MockDirectory.the() ); + assertTrue( MockDirectory.the("a") == MockDirectory.the("a") ); + + } + +} Property changes on: src/test/org/apache/lucene/index/TestMockDirectory.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java =================================================================== --- src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java (revision 521898) +++ src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java (working copy) @@ -13,6 +13,7 @@ import org.apache.lucene.store.RAMDirectory; public class TestAddIndexesNoOptimize extends TestCase { + public void testSimpleCase() throws IOException { // main directory Directory dir = new RAMDirectory(); @@ -255,7 +256,7 @@ writer.addIndexesNoOptimize(new Directory[] { aux, aux }); assertEquals(1020, writer.docCount()); - assertEquals(2, writer.getSegmentCount()); + assertEquals(3, writer.getSegmentCount()); assertEquals(1000, writer.getDocCount(0)); writer.close(); Index: src/test/org/apache/lucene/index/TestMockMerger.java =================================================================== --- src/test/org/apache/lucene/index/TestMockMerger.java (revision 0) +++ src/test/org/apache/lucene/index/TestMockMerger.java (revision 0) @@ -0,0 +1,66 @@ +package org.apache.lucene.index; + +import java.io.IOException; + +public class TestMockMerger extends TestCase { + + public void testMockMerger() { + + assertEquals( new MockMerger( "1001 101 11 1" ).toString(), + new MockMerger( "1001 101 11 1" ).toString() ); + + assertNotEquals( new MockMerger( "1001 101 11 1" ).toString(), + new MockMerger( "1001 101 11 11" ).toString() ); + + } + + SegmentInfos segmentInfos( SegmentInfos all, int first, int last ) { + SegmentInfos subset = new SegmentInfos(); + subset.addAll( all.subList( first, last ) ); + return subset; + } + + public void testUCF() throws IOException { + + MockMerger merger = new MockMerger( "c1001 C101" ); + + assertTrue( merger.segmentInfos.info(0).getUseCompoundFile() ); + assertFalse( merger.segmentInfos.info(1).getUseCompoundFile() ); + + MergePolicy.MergeSpecification spec = new MergePolicy.MergeSpecification(); + spec.segments = segmentInfos( merger.segmentInfos, 0, 1 ); + spec.useCompoundFile = true; + + merger.merge( spec ); + + assertTrue( merger.segmentInfos.info(0).getUseCompoundFile() ); + assertFalse( merger.segmentInfos.info(1).getUseCompoundFile() ); + + spec.segments = segmentInfos( merger.segmentInfos, 0, 1 ); + spec.useCompoundFile = false; + + merger.merge( spec ); + + assertFalse( merger.segmentInfos.info(0).getUseCompoundFile() ); + assertFalse( merger.segmentInfos.info(1).getUseCompoundFile() ); + + spec.segments = segmentInfos( merger.segmentInfos, 1, 2 ); + spec.useCompoundFile = false; + + merger.merge( spec ); + + assertFalse( merger.segmentInfos.info(0).getUseCompoundFile() ); + assertFalse( merger.segmentInfos.info(1).getUseCompoundFile() ); + + spec.segments = segmentInfos( merger.segmentInfos, 1, 2 ); + spec.useCompoundFile = true; + + merger.merge( spec ); + + assertFalse( merger.segmentInfos.info(0).getUseCompoundFile() ); + assertTrue( merger.segmentInfos.info(1).getUseCompoundFile() ); + + + } + +} Property changes on: src/test/org/apache/lucene/index/TestMockMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/MockMerger.java =================================================================== --- src/test/org/apache/lucene/index/MockMerger.java (revision 0) +++ src/test/org/apache/lucene/index/MockMerger.java (revision 0) @@ -0,0 +1,202 @@ +package org.apache.lucene.index; + +import org.apache.lucene.store.Directory; + +import java.io.IOException; + +class MockMerger implements Merger { + + MergePolicy policy; + + void setMergePolicy( MergePolicy policy ) { + this.policy = policy; + } + + public synchronized void optimize() + throws CorruptIndexException, IOException { + policy.optimize( segmentInfos ); + } + + public synchronized void merge() + throws CorruptIndexException, IOException { + policy.merge( segmentInfos ); + } + + MockMerger( String string ) { + + String[] strings = string.split( "\\s" ); + + segmentInfos = new SegmentInfos(); + + for( int i = 0; i < strings.length; i++ ) { + + StringBuffer buffer = new StringBuffer( strings[i] ); + + boolean isCompoundFile = true; + boolean hasSingleNormsFile = true; + String dir = ""; + + while ( buffer.charAt(0) < '0' || buffer.charAt(0) > '9' ) { + + char c = buffer.charAt(0); + buffer.deleteCharAt( 0 ); + + switch(c) { + case 'c': isCompoundFile = true; break; + case 'C': isCompoundFile = false; break; + case 'x': dir = "x"; break; + case 'y': dir = "y"; break; + case 'z': dir = "z"; break; + } + + } + + int size = Integer.parseInt( buffer.toString() ); + + SegmentInfo info = new SegmentInfo( "name" + i, + size, + MockDirectory.the( dir ) ); + info.setUseCompoundFile( isCompoundFile ); + segmentInfos.addElement( info ); + + } + } + + public void add( MockMerger other ) { + add( copy( other.segmentInfos ) ); + } + + protected void merge( SegmentInfo segment ) { + } + + void add( SegmentInfos segments ) { + synchronized( segmentInfos ) { + segmentInfos.addAll( segments ); + } + } + + static SegmentInfos copy( SegmentInfos segmentInfos ) { + SegmentInfos segments = new SegmentInfos(); + synchronized( segmentInfos ) { + segments.addAll( segmentInfos ); + } + return segments; + } + + void replace( MergePolicy.MergeSpecification spec, SegmentInfo info ) { + synchronized( segmentInfos ) { + + int first = segmentInfos.indexOf( spec.segments.info( 0 ) ); + int last = + segmentInfos.indexOf( spec.segments.info( spec.segments.size() - 1 ) ); + last++; + + // System.err.println( "*before " + segString( segmentInfos ) ); + // System.err.println( "first " + first ); + // System.err.println( "last " + last ); + + if ( !( first >= 0 ) || + !( last >= 1 ) || + !( last - first == spec.segments.size() ) ) { + throw new RuntimeException( "bad replace spec" ); + } + + segmentInfos.subList( first, last ).clear(); + segmentInfos.add( first, info ); + + // System.err.println( "*after " + segString( segmentInfos ) ); + + } + } + + public int merge( MergePolicy.MergeSpecification spec ) + throws CorruptIndexException, IOException { + + // System.err.println( "merge " + this ); + // System.err.println( "merge spec " + segString( spec ) ); + + SegmentInfo info = new SegmentInfo( "name", 0, directory ); + info.setUseCompoundFile( spec.useCompoundFile ); + + SegmentInfos segments = copy( spec.segments ); + + int docCount = 0; + + for( int i = 0; i < segments.size(); i++ ) { + merge( segments.info( i ) ); + docCount += segments.info( i ).docCount; + } + + info.docCount = docCount; + + replace( spec, info ); + + // System.err.println( "result " + this ); + + return docCount; + } + + public Directory getDirectory() { + return directory; + } + + SegmentInfos segmentInfos; + Directory directory = MockDirectory.the(); + + public String segString( SegmentInfos segments, + int first, + int beyond ) { + + segments = copy( segments ); + + StringBuffer buffer = new StringBuffer(); + + for( int i = first; i < beyond; i++ ) { + + if ( i > first ) { + buffer.append( ' ' ); + } + + buffer.append( '[' ); + buffer.append( i ); + buffer.append( ']' ); + + SegmentInfo info = segments.info( i ); + + try { + if ( info.getUseCompoundFile() ) { + buffer.append( 'c' ); + } else { + buffer.append( 'C' ); + } + } catch (Exception e) { + } + + if ( info.dir != getDirectory() ) { + buffer.append( 'x' ); + } + + buffer.append( info.docCount ); + + } + + return buffer.toString(); + + } + + public String segString( SegmentInfos segments ) { + synchronized( segments ) { + return segString( segments, 0, segments.size() ); + } + } + + public String segString( MergePolicy.MergeSpecification spec ) { + return segString( spec.segments ); + } + + public String toString() { + return segString( segmentInfos ); + } + +} + Property changes on: src/test/org/apache/lucene/index/MockMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/index/MockConcurrentMerger.java =================================================================== --- src/test/org/apache/lucene/index/MockConcurrentMerger.java (revision 0) +++ src/test/org/apache/lucene/index/MockConcurrentMerger.java (revision 0) @@ -0,0 +1,21 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.lang.Thread; +import java.lang.InterruptedException; + +class MockConcurrentMerger extends MockMerger { + + MockConcurrentMerger( String string ) { + super( string ); + } + + protected void merge( SegmentInfo segment ) { + try { + Thread.sleep( segment.docCount/10 ); + } catch ( InterruptedException ie ) { + } + } + +} + Property changes on: src/test/org/apache/lucene/index/MockConcurrentMerger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/LogarithmicMergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/LogarithmicMergePolicy.java (revision 0) +++ src/java/org/apache/lucene/index/LogarithmicMergePolicy.java (revision 0) @@ -0,0 +1,740 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Vector; + +/* Logarithmic merge policy + * + * The LMP maintains segments as increasing (from the right) groups of + * segments where the number of documents in the exponentinally + * increaing size in the number of documents. + * + * The LMP has three parameters: + * + * minMergeDocs: this is the lowest number of documents in a segment + * (not including single doc segments). + * + * mergeFactor: this is the base of the logarithm + * + * maxMergeDocs: this is used to compute the bound on the segment size + * + * Segments are conceptually assigned a level based on the number of + * documents they contain: + * + * level = floor( logM( docs / minMergeDocs ) ) + * + * logM: log-base mergeFactor + * docs = number of documetns in the segment + * + * This is a slight variant on the non-factored version: in + * particular, the boundary conditions are different. In the non-merge + * version, segments of size minMergeDocs*mergeFactor^n are assigned to level + * n-1. In this code, they are assigned to level n. + * + * In the simple case, where no deletes are occruing, when a new + * segment of level N is created and M-1 segments already exist at + * that level, a new merge will be initiated to replace the now M + * segments at level N with a new segment at level M+1. + * + * Notes: + * + * The number of documents does not (currently) consider latent + * deleted documents. + * + * The number of documents that a merge produces is dependent on + * latent deleted documetns in the segments making up the + * merge and the result of a merge may result in segment list that + * does not maintain the desired logarithmic invariant. + * + * In the presenes of deletes, a merge of M level N segments may well + * not generated a segement of Level N+1, since even one delete would + * violate the invariant. + * + */ + +class LogarithmicMergePolicy extends MergePolicyBase + implements LegacyMergePolicy { + + public LogarithmicMergePolicy(final Merger merger) { + super(merger); + } + + private boolean useCompoundFile = true; + + public void setUseCompoundFile( boolean useCompoundFile ) { + this.useCompoundFile = useCompoundFile; + } + + public boolean getUseCompoundFile() { + return useCompoundFile; + } + + private int minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS; + + public void setMinMergeDocs( int minMergeDocs ) { + if (minMergeDocs < 1) + throw new IllegalArgumentException("minMergeDocs must at least be 1"); + this.minMergeDocs = minMergeDocs; + } + + public int getMinMergeDocs() { + return minMergeDocs; + } + + private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + + public void setMaxMergeDocs( int maxMergeDocs ) { + if (maxMergeDocs < 2) + throw new IllegalArgumentException("maxMergeDocs must at least be 2"); + this.maxMergeDocs = maxMergeDocs; + } + + public int getMaxMergeDocs() { + return maxMergeDocs; + } + + private int mergeFactor = DEFAULT_MERGE_FACTOR; + + public void setMergeFactor( int mergeFactor ) { + if (mergeFactor < 2) + throw new IllegalArgumentException("mergeFactor must at least be 2"); + this.mergeFactor = mergeFactor; + } + + public int getMergeFactor() { + return mergeFactor; + } + + /* a ConsistencyState object wraps the merge invariants: + * + * 1) that the list of segments, viewed from the end, is a + * monotonically increasing in the log of the number of + * documents (not including deleted documents). + * + * 2 ) All segments are owned by the destination directory. + * + * Inconsistencies are measured in three ways: + * + * lowestConsistentBound: if not zero, it is the lowest logarithmic + * level above which logarithmic monotonicity holds. E.g., (assuming + * minMergeDocs = mergeFactor = 10 + * LCB( 100 10 1 ) == 0 + * LCB( 100 1 10 ) == 100 + * LCB( 1 10 100 ) == 1000 + * + * firstInconsistentSegemnt: when the invariants aren't met, the + * firstInconssistentSegment is the index of, well, the first inconsistent segment, e.g., (same givens as above) + * FIS( 100 1 10 ) = 1 + * + * inconsistentDirectories: a list of directories not contained by + * the merge which need to be copied if not otherwise merged + * + * Notes: + * + * Consistency is this context does not include checking that the number of segments at a level is <= M. + * + * TODO: decided/check boundary condtion below minMergeDocs + */ + + class ConsistencyState { + + Vector inconsistentDirectories = new Vector(); + int lowestConsistentBound = 0; + int firstInconsistentSegment = -1; + + ConsistencyState( SegmentInfos segmentInfos ) { + + int currentBound = 1; + int nextBound = minMergeDocs; + + synchronized( segmentInfos ) { + + for ( int i = segmentInfos.size()-1; i >=0; i-- ) { + + SegmentInfo info = segmentInfos.info(i); + + if ( info.dir != merger.getDirectory() ) { + inconsistentDirectories.add( 0, new Integer( i ) ); + } + + int docCount = info.docCount; + + if ( docCount < currentBound ) { + lowestConsistentBound = nextBound; + firstInconsistentSegment = i; + } + + while ( docCount >= nextBound ) { + + currentBound = nextBound; + nextBound *= mergeFactor; + + if ( nextBound > maxMergeDocs ) { + throw new IllegalArgumentException("No segment size can exceed maxMergeDocs"); + } + } + + } + + } + + } + + boolean isConsistent() { + return lowestConsistentBound == 0 && inconsistentDirectories.isEmpty(); + } + + } + + int lowestConsistentBound( SegmentInfos segmentInfos ) { + return new ConsistencyState( segmentInfos ).lowestConsistentBound; + } + + protected int merge( MergeSpecification spec ) + throws MergeException, CorruptIndexException, IOException { + int docCount = merger.merge( spec ); + return docCount; + } + + SegmentInfos segmentInfos( SegmentInfos all, int first, int last ) { + SegmentInfos subset = new SegmentInfos(); + synchronized( all ) { + subset.addAll( all.subList( first, last ) ); + } + return subset; + } + + static SegmentInfos copy( SegmentInfos segmentInfos ) { + SegmentInfos result = new SegmentInfos(); + synchronized( segmentInfos ) { + result.addAll( segmentInfos ); + } + return result; + } + + /* The primary function for handling merges is cascadingMerge. It + * takes a consistent list of segments and generates primivtifve + * merges until the logarithmic invariants are met. + * + * This function is meant to handling the cascading up of merges + * when a segment is added to a list where several levels + * + * This function takes two parameters that override the "default" + * behaviour: + * + * firstBound: this parameter effectively overrides minMergeDocs for + * one call. The result being that all segments with less than + * firstBound docs will be considered to be at the same level. THis + * behaviour is used when making an inconsistent index consistent: + * cascadingMerge is called with firstBound = leastConsistentBound. + * + * firstSegmentToConsider: this is used to manually set the left + * boundary of the subsequence. It also causes a merge even if the + * number of segments that would be merged is less than + * mergeFactor. It is used as part of the process of making an index + * concistent. + * + * Differences from the non-factored version: + * + * Log boundary conditions (see above) + * + * Prcoessing of non-minimum sized segments: this function, rather + * than staring at minimimum sized segments and stopping if none are + * found, looks at the lowest level found and checks it, stopping if + * it is okay. This is designed to complement the consitency checker + * which handles lists where levels are not contiguous but does not + * look at segments which are over populated buy logartihmically + * monotonic. + * + * Notes on concurrency: if the concurrent merge work goes forward, + * this function needs to be changed structurally. When concurrent + * can occur, segmentInfos can change over time and in a way + * unprediavblt in the curren thread. In particular, during a merge, + * segmentInfos can change a lot and thus the loop below which + * expects to do multiple merges baesd on data gleaned before + * beginning the merges probably needs to change. + */ + + public void cascadingMerge( SegmentInfos segmentInfos, + int firstBound, + int firstSegmentToConsider ) + throws MergeException, CorruptIndexException, IOException { + + long currentBound = 1; + long nextBound = firstBound; + + /* Determine the bounds for the smallest existing segment */ + + int firstCount = firstCount = segmentInfos.info( segmentInfos.size()-1 ).docCount; + while ( firstCount >= nextBound ) { + currentBound = nextBound; + nextBound *= mergeFactor; + } + + /* find the contigiuos subseguence of segments on this level, + * searching from the right */ + + while ( nextBound < maxMergeDocs ) { + + /* starts like this: */ + /* abcdefg */ + /*r l*/ + /* and iterates by pulling the left pointer towards the left */ + + int left = segmentInfos.size(); + int right = -1; + + /* search from the right ... */ + + while( --left >= 0 ) { + + int docCount = segmentInfos.info( left ).docCount; + + // System.err.println( "dc " + docCount + " cB " + currentBound + " nB " + nextBound ); + + /* first time a segment is in bound, set right */ + + if ( right == -1 && docCount >= currentBound && docCount < nextBound ) { + right = left; + } + + /* first time a segment is above bound, stop, leaving left where it is */ + + if ( docCount >= nextBound ) { + break; + } + + } + + int first = left + 1; + + if ( firstSegmentToConsider >= 0 ) { + first = firstSegmentToConsider; + } + + int length = right - first + 1; + + /* intoNext level is used to determine how to handle the results + * of a merge. If the merge results in a segment in the next + * level, a cascade will ensue. Otherwise, the merge has created + * a segment at the same level as the pervious segments and is a + * candidate for further merging at that level, if appropirate. + */ + + boolean intoNextLevel = false; + + MergeSpecification spec = new MergeSpecification(); + spec.useCompoundFile = useCompoundFile; + + while ( length > 0 ) { + + int mergeLength = mergeFactor; + + /* Normally, if the lenght of the subsequence is less than + * mergeFactor, no merge will occur. THis is modified if first + * has been overriden, as described above. + */ + + if ( length < mergeLength ) { + if ( firstSegmentToConsider == -1 ) { + break; + } + mergeLength = length; + } + + int last = first + mergeLength; + + // System.err.println( "before " + segString( segmentInfos ) ); + // System.err.println( first + " - " + last ); + + /* Create the subsquence. Note this creates a new segmentInfos + * (so it's not == to the one owned by the merger). This + * should help in concurrency but is not alone suffiucicnent. + */ + + spec.segments = segmentInfos( segmentInfos, first, last ); + + int docCount = 0; + + try { + docCount = merge( spec ); + } catch ( CorruptIndexException cie ) { + System.err.println( "cie " + cie ); + cie.printStackTrace(); + throw cie; + } catch ( IOException ioe ) { + System.err.println( "ioe " + ioe ); + ioe.printStackTrace(); + throw ioe; + } + + + // System.err.println( "after " + segString( segmentInfos ) ); + + length -= mergeLength; + + firstSegmentToConsider = -1; + + if ( docCount == -1 ) { + + /* If the merge ends up being concurrent, we don't know the + * number of documents that will result. So we just look at + * the rest of the segments. Note, though, that the bump by + * mergeLength assumes that segmentInfos hasn't been updated + * before this code is run, which is not safe. That's an + * example of where this needs to be tweaked for + * concurrency. Partly this could be handled by going to a + * rescrusvie rather than itteractive structure. But there + * are other issues that needs to be explored / understood ... + */ + + first += mergeLength; + + } else if ( docCount >= nextBound ) { + + /* If the new segment is into the next level, record that + * fact and continue with other segments at this level. */ + + first++; + intoNextLevel = true; + } else { + + /* The new segment is still in the current level, so just + * add it back to this segment */ + + length++; + } + + } + + /* At this point, there aren't maxMergeSegments left to be + * merged at this level. If at some point we created a segment + * at the next level, we need to chek it, so we'll fall through + * the break. Otherwise, the break fires, incidcaint that no + * further merges are needed to ensure consistency (as long as + * we were already consistent). + */ + if ( !intoNextLevel ) { + break; + } + + currentBound = nextBound; + nextBound *= mergeFactor; + + } + + } + + public void cascadingMerge( SegmentInfos segmentInfos, int firstBound ) + throws MergeException, CorruptIndexException, IOException { + cascadingMerge( segmentInfos, firstBound, /* firstSegmentToConsider = */ -1 ); + } + + public void cascadingMerge( SegmentInfos segmentInfos ) + throws MergeException, CorruptIndexException, IOException { + cascadingMerge( segmentInfos, + /* firstBound = */ minMergeDocs, + /* firstSegmentToConsider = */ -1 ); + } + + public boolean isConsistent( SegmentInfos segmentInfos ) { + return ( (new ConsistencyState( segmentInfos ) ).isConsistent() ); + } + + /* Use the merger to copy a squence of segments */ + + void copy( SegmentInfos segmentInfos, int first, int last ) + throws MergeException, CorruptIndexException, IOException { + + MergeSpecification spec = new MergeSpecification(); + spec.segments = segmentInfos( segmentInfos, first, last ); + spec.useCompoundFile = useCompoundFile; + + merge( spec ); + + } + + /* This is the "alternative" to cascadingMerge. It is used to take + * an inconsistent list of segments and make them conssistent. It + * uses cascadingMerge and copy to do this. + * + * This code is based closely on the non-factored version. + * + * Differences from the non-factored version: + * + * In the non-factored veresion, IndexWriter#addIndexesNoOptimize q + * has most of the code for making an index consistent. It's + * necessary in this function since it's purpose is to aggregate a + * number of indexes, in which case it's highlty unlikely the + * resulitng sequnce will follow the logarithmic + * invariants. However, there are other reasons an index will not be + * consistent. It may well not be consistent if parameters + * change. Deletes can also cause merges that result in + * inconsistency (I think). + * + * makeCOnsistent is supposed to take an inconsistent list and make + * it consistent. It does this without regard for what caused the + * inconsisetncy. Beacuse it makes fewer assumptions, the algorithm + * is slightly differen than addIndexesNoOptimize and the results + * can be idfferent in a few cases (though the invariants will still + * hold). + */ + + boolean makeConsistent( SegmentInfos segmentInfos ) + throws MergeException, CorruptIndexException, IOException { + + ConsistencyState state = new ConsistencyState( segmentInfos ); + + if ( state.isConsistent() ) { + return false; + } + + /* If the segments are ramSegments, merge them all. This is + * required because IndexWriter assumes that all the segments will + * be merged in this case. It's probably better if the merge + * policy is not even exposed to this case. + */ + + if ( segmentInfos.info(0).name.startsWith( "_ram_" ) ) { + copy( segmentInfos, 0, segmentInfos.size() ); + return true; + } + + /* If the segment list is not composed of continguous subsequences + * at monotonically increasing levels, use cascadingMerge with + * firstBound == the lowest consistent bound. This will devide + * the list into two pieces: a (possibly empty) consistent set on + * the left and an inconsistent set on the right, e.g., + * + * 1000 1000 10 100 10 + * < con > < incon > + * + * Starting with the left-most inconsistent segment, + * cascadingMerge will merge mergeFactor segments at a time using + * it's normal algorithm for the given level. + */ + + if ( state.lowestConsistentBound > 0 ) { + cascadingMerge( segmentInfos, state.lowestConsistentBound ); + state = new ConsistencyState( segmentInfos ); + } + + /* cascadingMerge will stop when there are less than + * maxMergeSegments, but there still be inconsistency in the last + * segments, of which there may be up to mergeFactor - 1 segments. + * For example, we could be left with + * + * 1000 100 100 10 100 + * + * with M = 4. Since the first inconsitency is only one from the end, + * it will not get merged. + * + * We use cascadingMerge again, but force it to merge these + * segments by sepcificying the firstInconsistentSegment. + */ + + if ( state.lowestConsistentBound > 0 ) { + cascadingMerge( segmentInfos, + state.lowestConsistentBound, + state.firstInconsistentSegment ); + } + + /* All of these merges will (should?) have left the sequence + * consistent, but there may be further possible merges. Try */ + + cascadingMerge( segmentInfos ); + + state = new ConsistencyState( segmentInfos ); + + /* Finally, it's possible this merger doesn't even own some of + * these segments. If all of the earlier merges left some external + * segments untouched, copy them in unchanged (except for garbage + * collected deleted docs */ + + if ( state.inconsistentDirectories.size() > 0 ) { + + for( int i = 0; i < state.inconsistentDirectories.size(); i++ ) { + + int segment = ((Integer)(state.inconsistentDirectories.elementAt( i ))).intValue(); + copy( segmentInfos, segment, segment+1 ); + } + + /* The copy could have generated segments that, though deleted doc gc, no longer obey the + * invariants. So just try the whole thing again. This might be overkill? Or maybe we should actually + * be doign this in more cases? + */ + + makeConsistent( segmentInfos ); + + } + + return true; + + } + + /* This is an artifact of the way deleted docs are handled in + * IndexWriter. It requests merges of ramSegments even though they + * are empty. This should probably be handled diffently in IW. + */ + + public boolean checkForEmptySegmentList( SegmentInfos segmentInfos ) + throws MergeException, CorruptIndexException, IOException { + + if ( segmentInfos.size() < 1 ) { + + MergeSpecification spec = new MergeSpecification(); + spec.segments = segmentInfos( segmentInfos, 0, 0 ); + spec.useCompoundFile = useCompoundFile; + + merge( spec ); + + return true; + + } + + return false; + + } + + /* MergeException is a placeholder for other exceptions in derived + * classes. LMP never throws it so if it catches one, it turns into + * a runtime error, which it is. + */ + + class MergeException extends Exception { + } + + protected void checkedMerge( SegmentInfos segmentInfos ) + throws MergeException, CorruptIndexException, IOException { + + // System.err.println( "merge " + segString( segmentInfos ) ); + + if ( checkForEmptySegmentList( segmentInfos ) ) { + return; + } + + /* Since any merge can result in inconsistency, we have to run + * makeConsistent aftewards anyway. Would it be better to combine + * the code and put it in a while loop? Would that be too much + * extra computation? Not sure yet ... + */ + + if ( !makeConsistent( segmentInfos ) ) { + cascadingMerge( segmentInfos, minMergeDocs ); + makeConsistent( segmentInfos ); + } + + // System.err.println( "merge result " + segString( segmentInfos ) ); + + } + + public void merge( SegmentInfos segmentInfos ) + throws CorruptIndexException, IOException { + try { + checkedMerge( segmentInfos ); + } catch ( MergeException me ) { + throw new RuntimeException ( "merge exception" ); + } + } + + protected void checkedOptimize( SegmentInfos segmentInfos ) + throws MergeException, CorruptIndexException, IOException { + + if ( segmentInfos.size() > 0 ) { + + while ( ( segmentInfos.size() > 1 ) || + SegmentReader.hasDeletions(segmentInfos.info(0)) || + SegmentReader.hasSeparateNorms(segmentInfos.info(0)) || + ( segmentInfos.info(0).dir != merger.getDirectory() ) || + ( useCompoundFile && (!SegmentReader.usesCompoundFile(segmentInfos.info(0)))) ) { + + MergeSpecification spec = new MergeSpecification(); + + int first = segmentInfos.size() - mergeFactor; + first = first < 0 ? 0 : first; + int last = segmentInfos.size(); + + spec.segments = segmentInfos( segmentInfos, first, last ); + spec.useCompoundFile = useCompoundFile; + + merge( spec ); + + } + + } + + } + + public void optimize( SegmentInfos segmentInfos ) + throws CorruptIndexException, IOException { + try { + checkedOptimize( segmentInfos ); + } catch ( MergeException me ) { + throw new RuntimeException ( "merge exception" ); + } + } + + static public String segString( SegmentInfos segmentInfos, + int first, + int beyond, + Merger merger ) { + + StringBuffer buffer = new StringBuffer(); + + for( int i = first; i < beyond; i++ ) { + + if ( i > first ) { + buffer.append( ' ' ); + } + + buffer.append( '[' ); + buffer.append( i ); + buffer.append( ']' ); + + SegmentInfo info = segmentInfos.info( i ); + + try { + if ( info.getUseCompoundFile() ) { + buffer.append( 'c' ); + } else { + buffer.append( 'C' ); + } + } catch (Exception e) { + } + + if ( info.dir != merger.getDirectory() ) { + buffer.append( 'x' ); + } + + buffer.append( info.docCount ); + + } + + return buffer.toString(); + + } + + public String segString( SegmentInfos segmentInfos, + int first, + int beyond ) { + + return segString( segmentInfos, first, beyond, merger ); + } + + public String segString( SegmentInfos segmentInfos ) { + return segString( segmentInfos, 0, segmentInfos.size() ); + } + + static public String segString( SegmentInfos segmentInfos, Merger merger ) { + return segString( segmentInfos, 0, segmentInfos.size(), merger ); + } + + public String segString( MergeSpecification spec ) { + return segString( spec.segments ); + } + + static public String segString( MergeSpecification spec, Merger merger ) { + return segString( spec.segments, merger ); + } + +} Property changes on: src/java/org/apache/lucene/index/LogarithmicMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/ConcurrentLogarithmicMergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/ConcurrentLogarithmicMergePolicy.java (revision 0) +++ src/java/org/apache/lucene/index/ConcurrentLogarithmicMergePolicy.java (revision 0) @@ -0,0 +1,269 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Vector; +import java.util.Map; +import java.util.HashMap; + +/* + * This class maintains the same merge policy as the + * LogarithmicMergePolicy but uses threads to allow, in some cases, + * other IndexWriter operations to continue while a merge is + * occuring. In fact, multiple merges may occur in parallel as long as + * they don't conflict. + * + * + * To be considered: + * + * May want to bound the number of threads. + * + * May want to prioritize threads. + * + * It's possible that a merge may start in a thread, and then become + * unecessary because of a rollback. Perhaps this could be checked by + * periodically checking that the segments being merged are still + * + * At the completion of a thread, more merges may become possible, + * e.g., a cascade. In the serial case, this is done in a loop. Should + * the CLMP call back to the merger to request another merge? + * + */ + +class ConcurrentLogarithmicMergePolicy extends LogarithmicMergePolicy + implements ConcurrentMergePolicy { + + private int minConcurrentMerge = DEFAULT_MIN_CONCURRENT_MERGE; + + public void setMinConcurrentMerge( int minConcurrentMerge ) { + this.minConcurrentMerge = minConcurrentMerge; + } + + public int getMinConcurrentMerge() { + return minConcurrentMerge; + } + + public ConcurrentLogarithmicMergePolicy(final Merger merger) { + super(merger); + } + + int numberOfDocuments ( SegmentInfos segmentInfos ) { + int number = 0; + for ( int i = 0; i < segmentInfos.size(); i++ ) { + number += segmentInfos.info(i).docCount; + } + return number; + } + + Map reserved = new HashMap(); + + class Reservation { + MergeSpecification spec; + + Reservation( MergeSpecification spec ) { + this.spec = spec.copy(); + } + + void close() { + release( spec ); + } + } + + Reservation reserve( MergeSpecification spec ) { + + // System.err.println( "reserve " + segString( spec ) ); + + Reservation reservation = null; + + synchronized ( reserved ) { + + for ( int segment = 0; segment < spec.segments.size(); segment++ ) { + if ( reserved.containsKey( spec.segments.info( segment ) ) ) { + return reservation; + } + } + + for ( int segment = 0; segment < spec.segments.size(); segment++ ) { + reserved.put( spec.segments.info( segment ), spec.segments.info( segment ) ); + } + } + + return new Reservation( spec ); + } + + void release( MergeSpecification spec ) { + + synchronized ( reserved ) { + for ( int segment = 0; segment < spec.segments.size(); segment++ ) { + reserved.remove( spec.segments.elementAt( segment ) ); + } + + reserved.notifyAll(); + } + + } + + boolean concurrentMerge( MergeSpecification spec ) + throws CorruptIndexException, IOException { + + if ( spec.segments.info(0).name.startsWith( "_ram_" ) ) { + return false; + } + + if ( numberOfDocuments( spec.segments ) < minConcurrentMerge ) { + return false; + } + + return true; + } + + class ConcurrentMerge implements Runnable { + + Reservation reservation; + + ConcurrentMerge( Reservation reservation ) { + this.reservation = reservation; + } + + public void run() { + // System.err.println( "starting thread " + Thread.currentThread().getName() + " " + segString( reservation.spec.segments ) ); + try { + + MergeSpecification spec = reservation.spec; + + for ( int segment = 0; segment < spec.segments.size(); segment++ ) { + // System.err.println( "owns " + Thread.currentThread().getName() + " " + spec.segments.info( segment ) ); + reserved.put( spec.segments.info( segment ), spec.segments.info( segment ) ); + } + + merger.merge( reservation.spec ); + } catch ( CorruptIndexException cie ) { + } catch ( IOException ioe ) { + } finally { + reservation.close(); + } + try { + merger.merge(); + } catch ( IOException ioe ) { + } + // System.err.println( "finishing thread " + Thread.currentThread().getName() + " " + segString( reservation.spec.segments ) ); + } + } + + class Conflict extends MergeException {} + + protected int merge( MergeSpecification spec ) + throws MergeException, CorruptIndexException, IOException { + + // System.err.println( "concurrent " + segString( spec ) ); + // System.err.println( spec.segments ); + // System.err.println( spec.segments.info(0) ); + + boolean concurrent = concurrentMerge( spec ); + + Reservation reservation = null; + + int docCount = -1; + + try { + + if ( concurrent && ( ( reservation = reserve( spec ) ) == null ) ) { + throw new Conflict(); + } + + if ( concurrent ) { + new Thread( new ConcurrentMerge( reservation ) ).start(); + reservation = null; + } else { + docCount = merger.merge( spec ); + } + + } finally { + if ( reservation != null ) { + reservation.close(); + reservation = null; + } + } + + // System.err.println( "concurrent after " + segString( spec.segments ) ); + + return docCount; + } + + public void merge( SegmentInfos segmentInfos ) + throws CorruptIndexException, IOException + { + boolean successful = false; + while ( !successful ) { + try { + checkedMerge( segmentInfos ); + successful = true; + } catch ( Conflict conflict ) { + conflictWait(); + } catch ( MergeException me ) { + throw new RuntimeException ( "merge exception" ); + } + } + } + + public void optimize( SegmentInfos segmentInfos ) + throws CorruptIndexException, IOException { + boolean successful = false; + while ( !successful ) { + try { + checkedOptimize( segmentInfos ); + successful = true; + } catch ( Conflict conflict ) { + conflictWait(); + } catch ( MergeException me ) { + throw new RuntimeException ( "merge exception" ); + } + } + } + + public void mergeWait() { + // System.err.println( "* waiting ..." ); + synchronized ( reserved ) { + while( !reserved.isEmpty() ) { + try { + reserved.wait(); + } catch ( InterruptedException ie ) { + } + } + } + // System.err.println( "* awake ..." ); + } + + public void close() { + mergeWait(); + } + + public void conflictWait() { + // System.err.println( "waiting" ); + synchronized ( reserved ) { + try { + reserved.wait(); + } catch ( InterruptedException ie ) { + } + } + // System.err.println( "awake" ); + } + +} + Property changes on: src/java/org/apache/lucene/index/ConcurrentLogarithmicMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/MergePolicyBase.java =================================================================== --- src/java/org/apache/lucene/index/MergePolicyBase.java (revision 0) +++ src/java/org/apache/lucene/index/MergePolicyBase.java (revision 0) @@ -0,0 +1,14 @@ +package org.apache.lucene.index; + +public abstract class MergePolicyBase implements MergePolicy { + + protected final Merger merger; + + protected MergePolicyBase( Merger merger ) { + this.merger = merger; + } + + public void close() { + } + +} Property changes on: src/java/org/apache/lucene/index/MergePolicyBase.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/MergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/MergePolicy.java (revision 0) +++ src/java/org/apache/lucene/index/MergePolicy.java (revision 0) @@ -0,0 +1,51 @@ +package org.apache.lucene.index; + +import java.io.IOException; + +interface MergePolicy { + + public static final int DEFAULT_MAX_BUFFERED_DOCS = 10; + + void setMinMergeDocs( int minMergeDocs ); + int getMinMergeDocs(); + + /* A merge specification gives a segmentInfos object which is the + * subsequence of segments to merge into a new segment. It's up to the + * merger to do the merge and to update it's only segmentInfos. + */ + + static class MergeSpecification implements Cloneable { + SegmentInfos segments; + boolean useCompoundFile; + public MergeSpecification copy() { + try { + return (MergeSpecification)super.clone(); + } catch ( CloneNotSupportedException cnse ) { + throw new RuntimeException ( "clone not supported" ); + } + } + } + + /* Merge a sequence of segments. The policy will call a merger to + * + * perform primivate merges using specficiations. The policy decides + * what merges to perform. + */ + void merge( SegmentInfos segmentInfos ) + throws CorruptIndexException, IOException; + + /* Optmize a sequence of segments. The policy decides what merges to + * perfom. + */ + void optimize( SegmentInfos segmentInfos ) + throws CorruptIndexException, IOException; + + /* Release resources */ + void close(); + + /* Notes: a merge policy piggybacks on the synchronization of the + * called (persuably the merger, e.g., IndexWriter). Non-concurrent + * mergers to do little internal synchronization. + */ + +} Property changes on: src/java/org/apache/lucene/index/MergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 521898) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -150,7 +150,7 @@ * referenced by the "front" of the index). For this, IndexFileDeleter * keeps track of the last non commit checkpoint. */ -public class IndexWriter { +public class IndexWriter implements Merger { /** * Default value for the write lock timeout (1,000). @@ -166,11 +166,6 @@ public static final String WRITE_LOCK_NAME = "write.lock"; /** - * Default value is 10. Change using {@link #setMergeFactor(int)}. - */ - public final static int DEFAULT_MERGE_FACTOR = 10; - - /** * Default value is 10. Change using {@link #setMaxBufferedDocs(int)}. */ public final static int DEFAULT_MAX_BUFFERED_DOCS = 10; @@ -181,11 +176,6 @@ public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000; /** - * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}. - */ - public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; - - /** * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}. */ public final static int DEFAULT_MAX_FIELD_LENGTH = 10000; @@ -226,12 +216,6 @@ private HashMap bufferedDeleteTerms = new HashMap(); private int numBufferedDeleteTerms = 0; - /** Use compound file setting. Defaults to true, minimizing the number of - * files used. Setting this to false may improve indexing performance, but - * may also cause file handle problems. - */ - private boolean useCompoundFile = true; - private boolean closeDir; private boolean closed; @@ -248,19 +232,23 @@ * Note that this just returns the value you set with setUseCompoundFile(boolean) * or the default. You cannot use this to query the status of an existing index. * @see #setUseCompoundFile(boolean) + * + * @deprecated + * */ public boolean getUseCompoundFile() { - ensureOpen(); - return useCompoundFile; + return ((LegacyMergePolicy)getMergePolicy()).getUseCompoundFile(); } /** Setting to turn on usage of a compound file. When on, multiple files * for each segment are merged into a single file once the segment creation * is finished. This is done regardless of what directory is in use. + * + * @deprecated + * */ public void setUseCompoundFile(boolean value) { - ensureOpen(); - useCompoundFile = value; + ((LegacyMergePolicy)getMergePolicy()).setUseCompoundFile( value ); } /** Expert: Set the Similarity implementation used by this IndexWriter. @@ -615,24 +603,38 @@ } } + private MergePolicy mergePolicy = new LogarithmicMergePolicy( this ); + + public void setMergePolicy( MergePolicy mp ) { + mergePolicy = mp; + } + + public MergePolicy getMergePolicy() { + return mergePolicy; + } + /** Determines the largest number of documents ever merged by addDocument(). * Small values (e.g., less than 10,000) are best for interactive indexing, * as this limits the length of pauses while indexing to a few seconds. * Larger values are best for batched indexing and speedier searches. * *

The default value is {@link Integer#MAX_VALUE}. + * + * @deprecated + * */ public void setMaxMergeDocs(int maxMergeDocs) { - ensureOpen(); - this.maxMergeDocs = maxMergeDocs; + ((LegacyMergePolicy)getMergePolicy()).setMaxMergeDocs( maxMergeDocs ); } /** * @see #setMaxMergeDocs + * + * @deprecated + * */ public int getMaxMergeDocs() { - ensureOpen(); - return maxMergeDocs; + return ((LegacyMergePolicy)getMergePolicy()).getMaxMergeDocs(); } /** @@ -669,20 +671,25 @@ *

The default value is 10. * * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2 + * + * @deprecated + * */ public void setMaxBufferedDocs(int maxBufferedDocs) { ensureOpen(); if (maxBufferedDocs < 2) throw new IllegalArgumentException("maxBufferedDocs must at least be 2"); - this.minMergeDocs = maxBufferedDocs; + getMergePolicy().setMinMergeDocs( maxBufferedDocs ); } /** * @see #setMaxBufferedDocs + * + * @deprecated + * */ public int getMaxBufferedDocs() { - ensureOpen(); - return minMergeDocs; + return getMergePolicy().getMinMergeDocs(); } /** @@ -718,20 +725,22 @@ * interactively maintained. * *

This must never be less than 2. The default value is 10. + * + * @deprecated + * */ public void setMergeFactor(int mergeFactor) { - ensureOpen(); - if (mergeFactor < 2) - throw new IllegalArgumentException("mergeFactor cannot be less than 2"); - this.mergeFactor = mergeFactor; + ((LegacyMergePolicy)getMergePolicy()).setMergeFactor( mergeFactor ); } /** * @see #setMergeFactor + * + * @deprecated + * */ public int getMergeFactor() { - ensureOpen(); - return mergeFactor; + return ((LegacyMergePolicy)getMergePolicy()).getMergeFactor(); } /** If non-null, this will be the default infoStream used @@ -833,7 +842,8 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public synchronized void close() throws CorruptIndexException, IOException { + public synchronized void close() + throws CorruptIndexException, IOException { if (!closed) { flushRamSegments(); @@ -845,6 +855,7 @@ } ramDirectory.close(); + mergePolicy.close(); if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; @@ -926,7 +937,7 @@ * occurs, the method will take more time to run (possibly * a long time if the index is large), and will require * free temporary space in the Directory to do the - * merging.

+ * merging.

FIXME * *

The amount of free space required when a merge is * triggered is up to 1X the size of all segments being @@ -938,12 +949,13 @@ * together), but whenever a full merge occurs (all * segments in the index, which is the worst case for * temporary space usage) then the maximum free disk space - * required is the same as {@link #optimize}.

+ * required is the same as {@link #optimize}.

FIXME * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void addDocument(Document doc) throws CorruptIndexException, IOException { + public void addDocument(Document doc) + throws CorruptIndexException, IOException { addDocument(doc, analyzer); } @@ -960,7 +972,8 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { + public void addDocument(Document doc, Analyzer analyzer) + throws CorruptIndexException, IOException { ensureOpen(); SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer); synchronized (this) { @@ -986,7 +999,8 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public synchronized void deleteDocuments(Term term) throws CorruptIndexException, IOException { + public synchronized void deleteDocuments(Term term) + throws CorruptIndexException, IOException { ensureOpen(); bufferDeleteTerm(term); maybeFlushRamSegments(); @@ -1000,7 +1014,8 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public synchronized void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException { + public synchronized void deleteDocuments(Term[] terms) + throws CorruptIndexException, IOException { ensureOpen(); for (int i = 0; i < terms.length; i++) { bufferDeleteTerm(terms[i]); @@ -1020,7 +1035,8 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException { + public void updateDocument(Term term, Document doc) + throws CorruptIndexException, IOException { ensureOpen(); updateDocument(term, doc, getAnalyzer()); } @@ -1076,19 +1092,6 @@ return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } - /** Determines how often segment indices are merged by addDocument(). With - * smaller values, less RAM is used while indexing, and searches on - * unoptimized indices are faster, but indexing speed is slower. With larger - * values, more RAM is used during indexing, and while searches on unoptimized - * indices are slower, indexing is faster. Thus larger values (> 10) are best - * for batch index creation, and smaller values (< 10) for indices that are - * interactively maintained. - * - *

This must never be less than 2. The default value is {@link #DEFAULT_MERGE_FACTOR}. - - */ - private int mergeFactor = DEFAULT_MERGE_FACTOR; - /** Determines the minimal number of documents required before the buffered * in-memory documents are merging and a new Segment is created. * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory}, @@ -1098,24 +1101,15 @@ *

The default value is {@link #DEFAULT_MAX_BUFFERED_DOCS}. */ - private int minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS; + //private int minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS; - /** Determines the largest number of documents ever merged by addDocument(). - * Small values (e.g., less than 10,000) are best for interactive indexing, - * as this limits the length of pauses while indexing to a few seconds. - * Larger values are best for batched indexing and speedier searches. - * - *

The default value is {@link #DEFAULT_MAX_MERGE_DOCS}. - - */ - private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; - /** If non-null, information about merges will be printed to this. */ private PrintStream infoStream = null; private static PrintStream defaultInfoStream = null; + // private static PrintStream defaultInfoStream = System.err; /** Merges all segments together into a single segment, * optimizing an index for search. @@ -1178,18 +1172,14 @@ public synchronized void optimize() throws CorruptIndexException, IOException { ensureOpen(); flushRamSegments(); - while (segmentInfos.size() > 1 || - (segmentInfos.size() == 1 && - (SegmentReader.hasDeletions(segmentInfos.info(0)) || - SegmentReader.hasSeparateNorms(segmentInfos.info(0)) || - segmentInfos.info(0).dir != directory || - (useCompoundFile && - (!SegmentReader.usesCompoundFile(segmentInfos.info(0))))))) { - int minSegment = segmentInfos.size() - mergeFactor; - mergeSegments(segmentInfos, minSegment < 0 ? 0 : minSegment, segmentInfos.size()); - } + mergePolicy.optimize( segmentInfos ); } + public synchronized void merge() throws CorruptIndexException, IOException { + flushRamSegments(); + mergePolicy.merge( segmentInfos ); + } + /* * Begin a transaction. During a transaction, any segment * merges that happen (or ram segments flushed) will not @@ -1222,8 +1212,10 @@ // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write once"). + // System.err.println( "before rollback " + LogarithmicMergePolicy.segString( segmentInfos, this ) ); segmentInfos.clear(); segmentInfos.addAll(localRollbackSegmentInfos); + // System.err.println( "after rollback " + LogarithmicMergePolicy.segString( segmentInfos, this ) ); localRollbackSegmentInfos = null; // Ask deleter to locate unreferenced files we had @@ -1239,6 +1231,8 @@ */ private void commitTransaction() throws IOException { + // System.err.println( "before commit " + LogarithmicMergePolicy.segString( segmentInfos, this ) ); + // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; @@ -1255,6 +1249,8 @@ // Give deleter a chance to remove files now: deleter.checkpoint(segmentInfos, autoCommit); + + // System.err.println( "after commit " + LogarithmicMergePolicy.segString( segmentInfos, this ) ); } /** @@ -1361,6 +1357,8 @@ public synchronized void addIndexes(Directory[] dirs) throws CorruptIndexException, IOException { + // System.err.println( "start aIs. ac = " + autoCommit + " lAC = " + localAutoCommit ); + ensureOpen(); optimize(); // start with zero or 1 seg @@ -1379,15 +1377,8 @@ } } - // merge newly added segments in log(n) passes - while (segmentInfos.size() > start+mergeFactor) { - for (int base = start; base < segmentInfos.size(); base++) { - int end = Math.min(segmentInfos.size(), base+mergeFactor); - if (end-base > 1) { - mergeSegments(segmentInfos, base, end); - } - } - } + mergePolicy.merge( segmentInfos ); + success = true; } finally { if (success) { @@ -1451,7 +1442,6 @@ flushRamSegments(); // 2 copy segment infos and find the highest level from dirs - int startUpperBound = minMergeDocs; boolean success = false; @@ -1470,64 +1460,13 @@ for (int j = 0; j < sis.size(); j++) { SegmentInfo info = sis.info(j); segmentInfos.addElement(info); // add each info - - while (startUpperBound < info.docCount) { - startUpperBound *= mergeFactor; // find the highest level from dirs - if (startUpperBound > maxMergeDocs) { - // upper bound cannot exceed maxMergeDocs - throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); - } - } } } - // 3 maybe merge segments starting from the highest level from dirs - maybeMergeSegments(startUpperBound); + mergePolicy.merge( segmentInfos ); - // get the tail segments whose levels <= h - int segmentCount = segmentInfos.size(); - int numTailSegments = 0; - while (numTailSegments < segmentCount - && startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) { - numTailSegments++; - } - if (numTailSegments == 0) { - success = true; - return; - } - - // 4 make sure invariants hold for the tail segments whose levels <= h - if (checkNonDecreasingLevels(segmentCount - numTailSegments)) { - // identify the segments from S to be copied (not merged in 3) - int numSegmentsToCopy = 0; - while (numSegmentsToCopy < segmentCount - && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) { - numSegmentsToCopy++; - } - if (numSegmentsToCopy == 0) { - success = true; - return; - } - - // copy those segments from S - for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { - mergeSegments(segmentInfos, i, i + 1); - } - if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { - success = true; - return; - } - } - - // invariants do not hold, simply merge those segments - mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount); - - // maybe merge segments again if necessary - if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { - maybeMergeSegments(startUpperBound * mergeFactor); - } - success = true; + } finally { if (success) { commitTransaction(); @@ -1600,7 +1539,7 @@ } } - if (useCompoundFile) { + if (getUseCompoundFile()) { boolean success = false; @@ -1660,19 +1599,22 @@ throws IOException { } - protected final void maybeFlushRamSegments() throws CorruptIndexException, IOException { + protected final void maybeFlushRamSegments() + throws CorruptIndexException, IOException { // A flush is triggered if enough new documents are buffered or // if enough delete terms are buffered - if (ramSegmentInfos.size() >= minMergeDocs || numBufferedDeleteTerms >= maxBufferedDeleteTerms) { + if (ramSegmentInfos.size() >= mergePolicy.getMinMergeDocs() || + numBufferedDeleteTerms >= maxBufferedDeleteTerms) { flushRamSegments(); } } /** Expert: Flushes all RAM-resident segments (buffered documents), then may merge segments. */ - private final synchronized void flushRamSegments() throws CorruptIndexException, IOException { + private final synchronized void flushRamSegments() + throws CorruptIndexException, IOException { if (ramSegmentInfos.size() > 0 || bufferedDeleteTerms.size() > 0) { - mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size()); - maybeMergeSegments(minMergeDocs); + mergePolicy.merge( ramSegmentInfos ); + mergePolicy.merge( segmentInfos ); } } @@ -1684,7 +1626,8 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public final synchronized void flush() throws CorruptIndexException, IOException { + public final synchronized void flush() + throws CorruptIndexException, IOException { ensureOpen(); flushRamSegments(); } @@ -1705,77 +1648,51 @@ return ramSegmentInfos.size(); } - /** Incremental segment merger. */ - private final void maybeMergeSegments(int startUpperBound) throws CorruptIndexException, IOException { - long lowerBound = -1; - long upperBound = startUpperBound; + boolean ramSegments( SegmentInfos segments ) { + return segments.size() == 0 || segments.info(0).name.startsWith( "_ram_" ); + } - while (upperBound < maxMergeDocs) { - int minSegment = segmentInfos.size(); - int maxSegment = -1; + void replace( MergePolicy.MergeSpecification spec, SegmentInfo info ) { + synchronized( segmentInfos ) { - // find merge-worthy segments - while (--minSegment >= 0) { - SegmentInfo si = segmentInfos.info(minSegment); + int first = segmentInfos.indexOf( spec.segments.info( 0 ) ); + int last = + segmentInfos.indexOf( spec.segments.info( spec.segments.size() - 1 ) ); + last++; - if (maxSegment == -1 && si.docCount > lowerBound && si.docCount <= upperBound) { - // start from the rightmost* segment whose doc count is in bounds - maxSegment = minSegment; - } else if (si.docCount > upperBound) { - // until the segment whose doc count exceeds upperBound - break; - } + // System.err.println( "*before " + LogarithmicMergePolicy.segString( segmentInfos, this ) ); + // System.err.println( "first " + first ); + // System.err.println( "last " + last ); + + if ( !( first >= 0 ) || + !( last >= 1 ) || + !( last - first == spec.segments.size() ) ) { + throw new RuntimeException( "bad replace spec" ); } - minSegment++; - maxSegment++; - int numSegments = maxSegment - minSegment; + segmentInfos.subList( first, last ).clear(); + segmentInfos.add( first, info ); - if (numSegments < mergeFactor) { - break; - } else { - boolean exceedsUpperLimit = false; + // System.err.println( "*after " + LogarithmicMergePolicy.segString( segmentInfos, this ) ); - // number of merge-worthy segments may exceed mergeFactor when - // mergeFactor and/or maxBufferedDocs change(s) - while (numSegments >= mergeFactor) { - // merge the leftmost* mergeFactor segments - - int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor); - numSegments -= mergeFactor; - - if (docCount > upperBound) { - // continue to merge the rest of the worthy segments on this level - minSegment++; - exceedsUpperLimit = true; - } else { - // if the merged segment does not exceed upperBound, consider - // this segment for further merges on this same level - numSegments++; - } - } - - if (!exceedsUpperLimit) { - // if none of the merged segments exceed upperBound, done - break; - } - } - - lowerBound = upperBound; - upperBound *= mergeFactor; } } - /** - * Merges the named range of segments, replacing them in the stack with a - * single segment. - */ - private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end) + public final int merge( MergePolicy.MergeSpecification spec ) throws CorruptIndexException, IOException { + // System.err.println( "IW merge < " + LogarithmicMergePolicy.segString( segmentInfos, this ) ); + // System.err.println( "IW merge * " + LogarithmicMergePolicy.segString( spec.segmentInfos, this ) ); + + SegmentInfos sourceSegments = spec.segments; + int minSegment = 0; + int end = sourceSegments.size(); + boolean useCompoundFile = spec.useCompoundFile; + // We may be called solely because there are deletes // pending, in which case doMerge is false: boolean doMerge = end > 0; + final String mergedName = newSegmentName(); SegmentMerger merger = null; @@ -1822,25 +1739,28 @@ newSegment = new SegmentInfo(mergedName, mergedDocCount, directory, false, true); } + - if (sourceSegments != ramSegmentInfos || anyDeletes) { + if (!ramSegments(sourceSegments) || anyDeletes) { // Now save the SegmentInfo instances that // we are replacing: rollback = (SegmentInfos) segmentInfos.clone(); } if (doMerge) { - if (sourceSegments == ramSegmentInfos) { + if (ramSegments(sourceSegments)) { segmentInfos.addElement(newSegment); } else { - for (int i = end-1; i > minSegment; i--) // remove old infos & add new - sourceSegments.remove(i); + replace( spec, newSegment ); - segmentInfos.set(minSegment, newSegment); + // for (int i = end-1; i > minSegment; i--) // remove old infos & add new + // sourceSegments.remove(i); + + // segmentInfos.set(minSegment, newSegment); } } - if (sourceSegments == ramSegmentInfos) { + if (ramSegments(sourceSegments)) { maybeApplyDeletes(doMerge); doAfterFlush(); } @@ -1855,13 +1775,13 @@ // The non-ram-segments case is already committed // (above), so all the remains for ram segments case // is to clear the ram segments: - if (sourceSegments == ramSegmentInfos) { + if (ramSegments(sourceSegments)) { ramSegmentInfos.removeAllElements(); } } else { // Must rollback so our state matches index: - if (sourceSegments == ramSegmentInfos && !anyDeletes) { + if (ramSegments(sourceSegments) && !anyDeletes) { // Simple case: newSegment may or may not have // been added to the end of our segment infos, // so just check & remove if so: @@ -1917,6 +1837,8 @@ deleter.checkpoint(segmentInfos, autoCommit); } + // System.err.println( "IW merge > " + LogarithmicMergePolicy.segString( segmentInfos, this ) ); + return mergedDocCount; } @@ -1980,24 +1902,6 @@ } } - private final boolean checkNonDecreasingLevels(int start) { - int lowerBound = -1; - int upperBound = minMergeDocs; - - for (int i = segmentInfos.size() - 1; i >= start; i--) { - int docCount = segmentInfos.info(i).docCount; - if (docCount <= lowerBound) { - return false; - } - - while (docCount > upperBound) { - lowerBound = upperBound; - upperBound *= mergeFactor; - } - } - return true; - } - // For test purposes. final synchronized int getBufferedDeleteTermsSize() { return bufferedDeleteTerms.size(); Index: src/java/org/apache/lucene/index/LegacyMergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/LegacyMergePolicy.java (revision 0) +++ src/java/org/apache/lucene/index/LegacyMergePolicy.java (revision 0) @@ -0,0 +1,17 @@ +package org.apache.lucene.index; + +interface LegacyMergePolicy extends MergePolicy { + + public static final int DEFAULT_MERGE_FACTOR = 10; + public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; + + void setMaxMergeDocs( int maxMergeDocs ); + int getMaxMergeDocs(); + + void setMergeFactor( int mergeFactor ); + int getMergeFactor(); + + void setUseCompoundFile( boolean useCompoundFile ); + boolean getUseCompoundFile(); + +} Property changes on: src/java/org/apache/lucene/index/LegacyMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 521898) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -295,6 +295,12 @@ if (infoStream != null) { message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [isCommit = " + isCommit + "]"); + if ( false ) try { + throw new RuntimeException( "help" ); + } catch ( RuntimeException re ) { + System.err.println( "lastFiles " + lastFiles ); + re.printStackTrace(); + } } // Try again now to delete any previously un-deletable Index: src/java/org/apache/lucene/index/Merger.java =================================================================== --- src/java/org/apache/lucene/index/Merger.java (revision 0) +++ src/java/org/apache/lucene/index/Merger.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.index; + +import org.apache.lucene.store.Directory; + +import java.io.IOException; + +/* This is the interface a MergePolicy needs of an object that will do + * primitive merges. Currently only implemented by IndexWriter. + */ + +interface Merger { + + /* The primary primitve merge operation */ + + int merge( MergePolicy.MergeSpecification m ) + throws CorruptIndexException, IOException; + + /* Identifies the directory a merge is using so that the merge + * policy can determine if segments need to be copied in + */ + Directory getDirectory(); + + /* These functions are (would be) used by the merge policy to chain + * operations, in other words, to ask the merge to perform the + * operation from the top. In particular, they are used by + * concurrent merge threds to chain the next level of operation + * since their completion may enable furhter merges. These merges + * cannot be started from within the concurrent policy itself + * because these threads operate without a lock on tyhe + * merger. These methods function to regain that lock. + */ + + void optimize() + throws CorruptIndexException, IOException; + + void merge() + throws CorruptIndexException, IOException; + +} Property changes on: src/java/org/apache/lucene/index/Merger.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/ConcurrentMergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/ConcurrentMergePolicy.java (revision 0) +++ src/java/org/apache/lucene/index/ConcurrentMergePolicy.java (revision 0) @@ -0,0 +1,14 @@ +package org.apache.lucene.index; + +import java.io.IOException; + +interface ConcurrentMergePolicy extends MergePolicy { + + public static final int DEFAULT_MIN_CONCURRENT_MERGE = 100; + + void setMinConcurrentMerge( int minConcurrentMerge ); + int getMinConcurrentMerge(); + + void mergeWait(); + +} Property changes on: src/java/org/apache/lucene/index/ConcurrentMergePolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/IndexModifier.java =================================================================== --- src/java/org/apache/lucene/index/IndexModifier.java (revision 521898) +++ src/java/org/apache/lucene/index/IndexModifier.java (working copy) @@ -104,7 +104,7 @@ protected boolean useCompoundFile = true; protected int maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; protected int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH; - protected int mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR; + protected int mergeFactor = LegacyMergePolicy.DEFAULT_MERGE_FACTOR; /** * Open an index with write access. @@ -214,7 +214,8 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - protected void createIndexReader() throws CorruptIndexException, IOException { + protected void createIndexReader() + throws CorruptIndexException, IOException { if (indexReader == null) { if (indexWriter != null) { indexWriter.close(); @@ -232,7 +233,8 @@ * be obtained) * @throws IOException if there is a low-level IO error */ - public void flush() throws CorruptIndexException, LockObtainFailedException, IOException { + public void flush() + throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { assureOpen(); if (indexWriter != null) { @@ -260,7 +262,8 @@ * be obtained) * @throws IOException if there is a low-level IO error */ - public void addDocument(Document doc, Analyzer docAnalyzer) throws CorruptIndexException, LockObtainFailedException, IOException { + public void addDocument(Document doc, Analyzer docAnalyzer) + throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { assureOpen(); createIndexWriter(); @@ -283,7 +286,8 @@ * be obtained) * @throws IOException if there is a low-level IO error */ - public void addDocument(Document doc) throws CorruptIndexException, LockObtainFailedException, IOException { + public void addDocument(Document doc) + throws CorruptIndexException, LockObtainFailedException, IOException { addDocument(doc, null); } @@ -304,7 +308,8 @@ * be obtained) * @throws IOException if there is a low-level IO error */ - public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { + public int deleteDocuments(Term term) + throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { assureOpen(); createIndexReader(); @@ -323,7 +328,8 @@ * be obtained) * @throws IllegalStateException if the index is closed */ - public void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { + public void deleteDocument(int docNum) + throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { assureOpen(); createIndexReader(); @@ -360,7 +366,8 @@ * be obtained) * @throws IOException if there is a low-level IO error */ - public void optimize() throws CorruptIndexException, LockObtainFailedException, IOException { + public void optimize() + throws CorruptIndexException, LockObtainFailedException, IOException { synchronized(directory) { assureOpen(); createIndexWriter(); Index: src/java/org/apache/lucene/store/RAMDirectory.java =================================================================== --- src/java/org/apache/lucene/store/RAMDirectory.java (revision 521898) +++ src/java/org/apache/lucene/store/RAMDirectory.java (working copy) @@ -183,6 +183,14 @@ ensureOpen(); RAMFile file = (RAMFile)fileMap.get(name); if (file!=null) { + if ( false ) + if ( name.equals( "_6.cfs" ) ) { + try { + throw new RuntimeException( "hell" ); + } catch ( RuntimeException re ) { + re.printStackTrace(); + } + } fileMap.remove(name); file.directory = null; sizeInBytes -= file.sizeInBytes; // updates to RAMFile.sizeInBytes synchronized on directory