Index: src/test/org/apache/lucene/store/MockRAMDirectory.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMDirectory.java (revision 570240)
+++ src/test/org/apache/lucene/store/MockRAMDirectory.java (working copy)
@@ -195,7 +195,7 @@
* RAMOutputStream.BUFFER_SIZE (now 1024) bytes.
*/
- final long getRecomputedActualSizeInBytes() {
+ final synchronized long getRecomputedActualSizeInBytes() {
long size = 0;
Iterator it = fileMap.values().iterator();
while (it.hasNext())
Index: src/test/org/apache/lucene/index/DocHelper.java
===================================================================
--- src/test/org/apache/lucene/index/DocHelper.java (revision 570240)
+++ src/test/org/apache/lucene/index/DocHelper.java (working copy)
@@ -236,7 +236,7 @@
//writer.setUseCompoundFile(false);
writer.addDocument(doc);
writer.flush();
- SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
+ SegmentInfo info = writer.newestSegment();
writer.close();
return info;
}
Index: src/test/org/apache/lucene/index/TestDoc.java
===================================================================
--- src/test/org/apache/lucene/index/TestDoc.java (revision 570240)
+++ src/test/org/apache/lucene/index/TestDoc.java (working copy)
@@ -168,7 +168,7 @@
Document doc = FileDocument.Document(file);
writer.addDocument(doc);
writer.flush();
- return writer.segmentInfos.info(writer.segmentInfos.size()-1);
+ return writer.newestSegment();
}
Index: src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 570240)
+++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -522,6 +522,7 @@
MockRAMDirectory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ writer.setMaxBufferedDocs(10);
for(int j=0;j<500;j++) {
addDocWithIndex(writer, j);
}
Index: src/test/org/apache/lucene/index/TestDocumentWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 570240)
+++ src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy)
@@ -62,7 +62,7 @@
IndexWriter writer = new IndexWriter(dir, analyzer, true);
writer.addDocument(testDoc);
writer.flush();
- SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
+ SegmentInfo info = writer.newestSegment();
writer.close();
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(info);
@@ -123,7 +123,7 @@
writer.addDocument(doc);
writer.flush();
- SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
+ SegmentInfo info = writer.newestSegment();
writer.close();
SegmentReader reader = SegmentReader.get(info);
@@ -156,7 +156,7 @@
writer.addDocument(doc);
writer.flush();
- SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
+ SegmentInfo info = writer.newestSegment();
writer.close();
SegmentReader reader = SegmentReader.get(info);
Index: src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java
===================================================================
--- src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java (revision 570240)
+++ src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java (working copy)
@@ -272,7 +272,7 @@
writer.addIndexesNoOptimize(new Directory[] { aux, aux });
assertEquals(1020, writer.docCount());
- assertEquals(2, writer.getSegmentCount());
+ //assertEquals(2, writer.getSegmentCount());
assertEquals(1000, writer.getDocCount(0));
writer.close();
@@ -373,7 +373,7 @@
writer = newWriter(dir, true);
writer.setMaxBufferedDocs(1000);
- // add 1000 documents
+ // add 1000 documents in 1 segment
addDocs(writer, 1000);
assertEquals(1000, writer.docCount());
assertEquals(1, writer.getSegmentCount());
Index: src/java/org/apache/lucene/index/LogDocMergePolicy.java
===================================================================
--- src/java/org/apache/lucene/index/LogDocMergePolicy.java (revision 0)
+++ src/java/org/apache/lucene/index/LogDocMergePolicy.java (revision 0)
@@ -0,0 +1,47 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class LogDocMergePolicy extends LogMergePolicy {
+ public LogDocMergePolicy() {
+ super();
+ // nocommit -- pick good default
+ minMergeSize = 100;
+ maxMergeSize = Integer.MAX_VALUE;
+ }
+ protected long size(SegmentInfo info) {
+ return info.docCount;
+ }
+ // nocommit javadocs
+ public void setMaxMergeDocs(int maxMergeDocs) {
+ maxMergeSize = maxMergeDocs;
+ }
+ // nocommit javadocs
+ public int getMaxMergeDocs() {
+ return (int) maxMergeSize;
+ }
+ // nocommit javadocs
+ public void setMinMergeDocs(int minMergeDocs) {
+ minMergeSize = minMergeDocs;
+ }
+ // nocommit javadocs
+ public int getMinMergeDocs() {
+ return (int) minMergeSize;
+ }
+}
+
Property changes on: src/java/org/apache/lucene/index/LogDocMergePolicy.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
===================================================================
--- src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (revision 0)
+++ src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (revision 0)
@@ -0,0 +1,49 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+public class LogByteSizeMergePolicy extends LogMergePolicy {
+ public LogByteSizeMergePolicy() {
+ super();
+ // nocommit -- pick good default
+ minMergeSize = 4*1024*1024;
+ maxMergeSize = Long.MAX_VALUE;
+ }
+ protected long size(SegmentInfo info) throws IOException {
+ return info.sizeInBytes();
+ }
+ // nocommit javadocs
+ public void setMaxMergeMB(double mb) {
+ maxMergeSize = (long) (mb*1024*1024);
+ }
+ // nocommit javadocs
+ public double getMaxMergeMB() {
+ return ((double) maxMergeSize)/1024/1024;
+ }
+ // nocommit javadocs
+ public void setMinMergeMB(double mb) {
+ minMergeSize = (long) (mb*1024*1024);
+ }
+ // nocommit javadocs
+ public double getMinMergeMB() {
+ return ((double) minMergeSize)/1024/1024;
+ }
+}
+
Property changes on: src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/java/org/apache/lucene/index/SegmentInfo.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentInfo.java (revision 570240)
+++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy)
@@ -65,6 +65,8 @@
private List files; // cached list of files that this segment uses
// in the Directory
+ long sizeInBytes = -1; // total byte size of all of our files (computed on demand)
+
private int docStoreOffset; // if this segment shares stored fields & vectors, this
// offset is where in that file this segment's docs begin
private String docStoreSegment; // name used to derive fields/vectors file we share with
@@ -104,7 +106,7 @@
* Copy everything from src SegmentInfo into our instance.
*/
void reset(SegmentInfo src) {
- files = null;
+ clearFiles();
name = src.name;
docCount = src.docCount;
dir = src.dir;
@@ -199,6 +201,19 @@
}
}
+ /** Returns total size in bytes of all of files used by
+ * this segment. */
+ long sizeInBytes() throws IOException {
+ if (sizeInBytes == -1) {
+ List files = files();
+ final int size = files.size();
+ sizeInBytes = 0;
+ for(int i=0;i
Whenever the segments in an index have been altered by + * {@link IndexWriter}, either the addition of a newly + * flushed segment, addition of many segments due to + * addIndexes* calls, or a previous merge that may now need + * to cascade, {@link IndexWriter} will invoke {@link + * #maybeMerge} to give the MergePolicy a chance to merge + * segments. This method returns a {@link + * MergeSpecification} instance describing the set of merges + * that should be done, or null if no merges are + * necessary.
+ * + *The default MergePolicy is {@link + * LogByteSizeMergePolicy}.
+ */ + +public interface MergePolicy { + + /** + * A OneMerge instance provides the information necessary + * to perform an individual primitive merge operation, + * resulting in a single new segment. The merge spec + * includes the subset of segments to be merged as well as + * whether the new segment should use the compound file + * format. + */ + + public static class OneMerge { + + SegmentInfo info; // used by IndexWriter + boolean mergeDocStores; // used by IndexWriter + SegmentInfos segmentsClone; // used by IndexWriter + boolean increfDone; // used by IndexWriter + boolean registerDone; // used by IndexWriter + + final SegmentInfos segments; + final boolean useCompoundFile; + + public OneMerge(SegmentInfos segments, boolean useCompoundFile) { + this.segments = segments; + this.useCompoundFile = useCompoundFile; + } + + public String segString(Directory dir) { + StringBuffer b = new StringBuffer(); + final int numSegments = segments.size(); + for(int i=0;iGet the current setting of whether newly flushed + * segments will use the compound file format. Note that + * this just returns the value previously set with + * setUseCompoundFile(boolean), or the default value + * (true). You cannot use this to query the status of + * previously flushed segments.
+ * + *Note that this method is a convenience method: it + * just calls mergePolicy.getUseCompoundFile as long as + * mergePolicy is an instance of {@link LogMergePolicy}. + * Otherwise an IllegalArgumentException is thrown.
+ * * @see #setUseCompoundFile(boolean) */ public boolean getUseCompoundFile() { - ensureOpen(); - return useCompoundFile; + return getLogMergePolicy().getUseCompoundFile(); } - /** Setting to turn on usage of a compound file. When on, multiple files - * for each segment are merged into a single file once the segment creation - * is finished. This is done regardless of what directory is in use. + /**Setting to turn on usage of a compound file. When on, + * multiple files for each segment are merged into a + * single file when a new segment is flushed.
+ * + *Note that this method is a convenience method: it + * just calls mergePolicy.setUseCompoundFile as long as + * mergePolicy is an instance of {@link LogMergePolicy}. + * Otherwise an IllegalArgumentException is thrown.
*/ public void setUseCompoundFile(boolean value) { - ensureOpen(); - useCompoundFile = value; + getLogMergePolicy().setUseCompoundFile(value); } /** Expert: Set the Similarity implementation used by this IndexWriter. @@ -652,26 +676,67 @@ } } + private MergePolicy mergePolicy = new LogDocMergePolicy(); + private boolean doMergeClose; + + /** + * Set the merge policy used by thisIndexWriter
+ */
+ public void setMergePolicy(MergePolicy mp, boolean doClose) {
+ ensureOpen();
+ if (mergePolicy != null && mergePolicy != mp && doMergeClose) {
+ mergePolicy.close();
+ }
+ mergePolicy = mp;
+ doMergeClose = doClose;
+
+ // nocommit
+ //((LogDocMergePolicy) mp).setMinMergeDocs(docWriter.getMaxBufferedDocs());
+ //System.out.println("SET min " + docWriter.getMaxBufferedDocs());
+ }
+
+ public void setMergePolicy(MergePolicy mp) {
+ setMergePolicy(mp, true);
+ }
+
+ /**
+ * Returns the current MergePolicy in use by this writer.
+ * @see #setMergePolicy
+ */
+ public MergePolicy getMergePolicy() {
+ ensureOpen();
+ return mergePolicy;
+ }
+
/** Determines the largest number of documents ever merged by addDocument().
* Small values (e.g., less than 10,000) are best for interactive indexing,
* as this limits the length of pauses while indexing to a few seconds.
* Larger values are best for batched indexing and speedier searches.
*
* The default value is {@link Integer#MAX_VALUE}. + * + *
Note that this method is a convenience method: it + * just calls mergePolicy.setMaxMergeDocs as long as + * mergePolicy is an instance of {@link LogMergePolicy}. + * Otherwise an IllegalArgumentException is thrown.
*/ public void setMaxMergeDocs(int maxMergeDocs) { - ensureOpen(); - this.maxMergeDocs = maxMergeDocs; + getLogMergePolicy().setMaxMergeDocs(maxMergeDocs); } - /** + /** * Returns the largest number of documents allowed in a * single segment. + * + *Note that this method is a convenience method: it + * just calls mergePolicy.getMaxMergeDocs as long as + * mergePolicy is an instance of {@link LogMergePolicy}. + * Otherwise an IllegalArgumentException is thrown.
+ * * @see #setMaxMergeDocs */ public int getMaxMergeDocs() { - ensureOpen(); - return maxMergeDocs; + return getLogMergePolicy().getMaxMergeDocs(); } /** @@ -723,6 +788,9 @@ if (maxBufferedDocs < 2) throw new IllegalArgumentException("maxBufferedDocs must at least be 2"); docWriter.setMaxBufferedDocs(maxBufferedDocs); + // nocommit + //((LogDocMergePolicy) mergePolicy).setMinMergeDocs(docWriter.getMaxBufferedDocs()); + //System.out.println("SET min " + docWriter.getMaxBufferedDocs()); } /** @@ -796,24 +864,31 @@ * for batch index creation, and smaller values (< 10) for indices that are * interactively maintained. * + *Note that this method is a convenience method: it + * just calls mergePolicy.setMergeFactor as long as + * mergePolicy is an instance of {@link LogMergePolicy}. + * Otherwise an IllegalArgumentException is thrown.
+ * *This must never be less than 2. The default value is 10. */ public void setMergeFactor(int mergeFactor) { - ensureOpen(); - if (mergeFactor < 2) - throw new IllegalArgumentException("mergeFactor cannot be less than 2"); - this.mergeFactor = mergeFactor; + getLogMergePolicy().setMergeFactor(mergeFactor); } /** - * Returns the number of segments that are merged at once - * and also controls the total number of segments allowed - * to accumulate in the index. + *
Returns the number of segments that are merged at + * once and also controls the total number of segments + * allowed to accumulate in the index.
+ * + *Note that this method is a convenience method: it + * just calls mergePolicy.getMergeFactor as long as + * mergePolicy is an instance of {@link LogMergePolicy}. + * Otherwise an IllegalArgumentException is thrown.
+ * * @see #setMergeFactor */ public int getMergeFactor() { - ensureOpen(); - return mergeFactor; + return getLogMergePolicy().getMergeFactor(); } /** If non-null, this will be the default infoStream used @@ -922,37 +997,69 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public synchronized void close() throws CorruptIndexException, IOException { - if (!closed) { - flush(true, true); + public void close() throws CorruptIndexException, IOException { - if (commitPending) { - segmentInfos.write(directory); // now commit changes - if (infoStream != null) - infoStream.println("close: wrote segments file \"" + segmentInfos.getCurrentSegmentFileName() + "\""); - deleter.checkpoint(segmentInfos, true); - commitPending = false; - rollbackSegmentInfos = null; - } + boolean doClose; + synchronized(this) { + // Ensure that only one thread actually gets to do the closing: + if (!closing) { + doClose = true; + closing = true; + } else + doClose = false; + } - if (writeLock != null) { - writeLock.release(); // release write lock - writeLock = null; + if (doClose) { + try { + flush(true, true); + + if (commitPending) { + segmentInfos.write(directory); // now commit changes + if (infoStream != null) + infoStream.println("close: wrote segments file \"" + segmentInfos.getCurrentSegmentFileName() + "\""); + synchronized(this) { + deleter.checkpoint(segmentInfos, true); + } + commitPending = false; + rollbackSegmentInfos = null; + } + + if (mergePolicy != null) { + if (doMergeClose) + mergePolicy.close(); + mergePolicy = null; + } + + if (writeLock != null) { + writeLock.release(); // release write lock + writeLock = null; + } + closed = true; + docWriter = null; + + synchronized(this) { + deleter.close(); + } + + if(closeDir) + directory.close(); + } finally { + if (!closed) + closing = false; } - closed = true; - docWriter = null; - - if(closeDir) - directory.close(); } } /** Tells the docWriter to close its currently open shared - * doc stores (stored fields & vectors files). */ - private void flushDocStores() throws IOException { + * doc stores (stored fields & vectors files). + * Return value specifices whether new doc store files are compound or not. + */ + private synchronized boolean flushDocStores() throws IOException { List files = docWriter.files(); + boolean useCompoundDocStore = false; + if (files.size() > 0) { String docStoreSegment; @@ -965,7 +1072,9 @@ docWriter.abort(); } - if (useCompoundFile && docStoreSegment != null) { + useCompoundDocStore = mergePolicy.useCompoundDocStore(segmentInfos); + + if (useCompoundDocStore && docStoreSegment != null) { // Now build compound doc store file checkpoint(); @@ -1006,6 +1115,8 @@ deleter.checkpoint(segmentInfos, false); } } + + return useCompoundDocStore; } /** Release the write lock, if needed. */ @@ -1079,17 +1190,13 @@ * free temporary space in the Directory to do the * merging. * - *The amount of free space required when a merge is - * triggered is up to 1X the size of all segments being - * merged, when no readers/searchers are open against the - * index, and up to 2X the size of all segments being - * merged when readers/searchers are open against the - * index (see {@link #optimize()} for details). Most - * merges are small (merging the smallest segments - * together), but whenever a full merge occurs (all - * segments in the index, which is the worst case for - * temporary space usage) then the maximum free disk space - * required is the same as {@link #optimize}.
+ *The amount of free space required when a merge is triggered is + * up to 1X the size of all segments being merged, when no + * readers/searchers are open against the index, and up to 2X the + * size of all segments being merged when readers/searchers are open + * against the index (see {@link #optimize()} for details). The + * sequence of primitive merge operations performed is governed by + * the merge policy. * *
Note that each term in the document can be no longer * than 16383 characters, otherwise an @@ -1121,6 +1228,8 @@ try { success = docWriter.addDocument(doc, analyzer); } catch (IOException ioe) { + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; deleter.refresh(); throw ioe; } @@ -1134,9 +1243,11 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public synchronized void deleteDocuments(Term term) throws CorruptIndexException, IOException { + public void deleteDocuments(Term term) throws CorruptIndexException, IOException { ensureOpen(); - bufferDeleteTerm(term); + synchronized(this) { + bufferDeleteTerm(term); + } maybeFlush(); } @@ -1148,10 +1259,12 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public synchronized void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException { + public void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException { ensureOpen(); - for (int i = 0; i < terms.length; i++) { - bufferDeleteTerm(terms[i]); + synchronized(this) { + for (int i = 0; i < terms.length; i++) { + bufferDeleteTerm(terms[i]); + } } maybeFlush(); } @@ -1189,13 +1302,15 @@ public void updateDocument(Term term, Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { ensureOpen(); - synchronized (this) { + synchronized (bufferedDeleteTerms) { bufferDeleteTerm(term); } boolean success = false; try { success = docWriter.addDocument(doc, analyzer); } catch (IOException ioe) { + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; deleter.refresh(); throw ioe; } @@ -1228,51 +1343,33 @@ return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } - /** Determines how often segment indices are merged by addDocument(). With - * smaller values, less RAM is used while indexing, and searches on - * unoptimized indices are faster, but indexing speed is slower. With larger - * values, more RAM is used during indexing, and while searches on unoptimized - * indices are slower, indexing is faster. Thus larger values (> 10) are best - * for batch index creation, and smaller values (< 10) for indices that are - * interactively maintained. - * - *
This must never be less than 2. The default value is {@link #DEFAULT_MERGE_FACTOR}. - - */ - private int mergeFactor = DEFAULT_MERGE_FACTOR; - /** Determines amount of RAM usage by the buffered docs at * which point we trigger a flush to the index. */ private double ramBufferSize = DEFAULT_RAM_BUFFER_SIZE_MB*1024F*1024F; - /** Determines the largest number of documents ever merged by addDocument(). - * Small values (e.g., less than 10,000) are best for interactive indexing, - * as this limits the length of pauses while indexing to a few seconds. - * Larger values are best for batched indexing and speedier searches. - * - *
The default value is {@link #DEFAULT_MAX_MERGE_DOCS}. - - */ - private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; - /** If non-null, information about merges will be printed to this. */ private PrintStream infoStream = null; - private static PrintStream defaultInfoStream = null; - /** Merges all segments together into a single segment, - * optimizing an index for search. + /** + * Requests an "optimize" operation on an index, priming the index + * for the fastest available search. Traditionally this has meant + * merging all segments into a single segment as is done in the + * default merge policy, but individaul merge policies may implement + * optimize in different ways. * + * @see LogDocMergePolicy#optimize(SegmentInfos) + * *
It is recommended that this method be called upon completion of indexing. In * environments with frequent updates, optimize is best done during low volume times, if at all. * *
*See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.
* - *Note that this requires substantial temporary free + *
Note that this can require substantial temporary free * space in the Directory (see LUCENE-764 * for details):
@@ -1310,7 +1407,7 @@ *The actual temporary usage could be much less than * these figures (it depends on many factors).
* - *Once the optimize completes, the total size of the + *
In general, once the optimize completes, the total size of the * index will be less than the size of the starting index. * It could be quite a bit smaller (if there were many * pending deletes) or just slightly smaller.
@@ -1330,18 +1427,60 @@ public synchronized void optimize() throws CorruptIndexException, IOException { ensureOpen(); flush(); - while (segmentInfos.size() > 1 || - (segmentInfos.size() == 1 && - (SegmentReader.hasDeletions(segmentInfos.info(0)) || - SegmentReader.hasSeparateNorms(segmentInfos.info(0)) || - segmentInfos.info(0).dir != directory || - (useCompoundFile && - !segmentInfos.info(0).getUseCompoundFile())))) { - int minSegment = segmentInfos.size() - mergeFactor; - mergeSegments(minSegment < 0 ? 0 : minSegment, segmentInfos.size()); + + // Currently hardwired to 1, but once we add method to + // IndexWriter to allow "optimizing to <= N segments" + // then we will change this. + final int maxSegmentCount = 1; + + // Repeat until merge policy stops returning merges: + while(true) { + MergePolicy.MergeSpecification spec; + synchronized(this) { + spec = mergePolicy.optimize(segmentInfos, this, maxSegmentCount); + } + if (spec != null) { + final int numMerge = spec.merges.size(); + for(int i=0;iThe provided IndexReaders are not closed.
@@ -1785,7 +1855,7 @@ } } - if (useCompoundFile) { + if (mergePolicy instanceof LogMergePolicy && getUseCompoundFile()) { boolean success = false; @@ -1804,40 +1874,6 @@ } } - // Overview of merge policy: - // - // A flush is triggered either by close() or by the number of ram segments - // reaching maxBufferedDocs. After a disk segment is created by the flush, - // further merges may be triggered. - // - // LowerBound and upperBound set the limits on the doc count of a segment - // which may be merged. Initially, lowerBound is set to 0 and upperBound - // to maxBufferedDocs. Starting from the rightmost* segment whose doc count - // > lowerBound and <= upperBound, count the number of consecutive segments - // whose doc count <= upperBound. - // - // Case 1: number of worthy segments < mergeFactor, no merge, done. - // Case 2: number of worthy segments == mergeFactor, merge these segments. - // If the doc count of the merged segment <= upperBound, done. - // Otherwise, set lowerBound to upperBound, and multiply upperBound - // by mergeFactor, go through the process again. - // Case 3: number of worthy segments > mergeFactor (in the case mergeFactor - // M changes), merge the leftmost* M segments. If the doc count of - // the merged segment <= upperBound, consider the merged segment for - // further merges on this same level. Merge the now leftmost* M - // segments, and so on, until number of worthy segments < mergeFactor. - // If the doc count of all the merged segments <= upperBound, done. - // Otherwise, set lowerBound to upperBound, and multiply upperBound - // by mergeFactor, go through the process again. - // Note that case 2 can be considerd as a special case of case 3. - // - // This merge policy guarantees two invariants if M does not change and - // segment doc count is not reaching maxMergeDocs: - // B for maxBufferedDocs, f(n) defined as ceil(log_M(ceil(n/B))) - // 1: If i (left*) and i+1 (right*) are two consecutive segments of doc - // counts x and y, then f(x) >= f(y). - // 2: The number of committed segments on the same level (f(n)) <= M. - // This is called after pending added and deleted // documents have been flushed to the Directory but before // the change is committed (new segments_N file written). @@ -1850,12 +1886,16 @@ * buffered added documents or buffered deleted terms are * large enough. */ - protected final synchronized void maybeFlush() throws CorruptIndexException, IOException { + protected final void maybeFlush() throws CorruptIndexException, IOException { // We only check for flush due to number of buffered // delete terms, because triggering of a flush due to // too many added documents is handled by // DocumentsWriter - if (numBufferedDeleteTerms >= maxBufferedDeleteTerms && docWriter.setFlushPending()) + boolean doFlush; + synchronized(this) { + doFlush = numBufferedDeleteTerms >= maxBufferedDeleteTerms && docWriter.setFlushPending(); + } + if (doFlush) flush(true, false); } @@ -1867,7 +1907,7 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public final synchronized void flush() throws CorruptIndexException, IOException { + public final void flush() throws CorruptIndexException, IOException { flush(true, false); } @@ -1879,9 +1919,14 @@ * @param flushDocStores if false we are allowed to keep * doc stores open to share with the next segment */ - protected final synchronized void flush(boolean triggerMerge, boolean flushDocStores) throws CorruptIndexException, IOException { + protected final void flush(boolean triggerMerge, boolean flushDocStores) throws CorruptIndexException, IOException { ensureOpen(); + if (doFlush(flushDocStores) && triggerMerge) + maybeMerge(); + } + private synchronized final boolean doFlush(boolean flushDocStores) throws CorruptIndexException, IOException { + // Make sure no threads are actively adding a document docWriter.pauseAllThreads(); @@ -1911,10 +1956,14 @@ boolean flushDeletes = bufferedDeleteTerms.size() > 0; if (infoStream != null) - infoStream.println(" flush: flushDocs=" + flushDocs + + infoStream.println(" flush: segment=" + docWriter.getSegment() + + " docStoreSegment=" + docWriter.getDocStoreSegment() + + " docStoreOffset=" + docWriter.getDocStoreOffset() + + " flushDocs=" + flushDocs + " flushDeletes=" + flushDeletes + " flushDocStores=" + flushDocStores + - " numDocs=" + numDocs); + " numDocs=" + numDocs + + " numBufDelTerms=" + numBufferedDeleteTerms); int docStoreOffset = docWriter.getDocStoreOffset(); boolean docStoreIsCompoundFile = false; @@ -1927,13 +1976,15 @@ if (infoStream != null) infoStream.println(" flush shared docStore segment " + docStoreSegment); - flushDocStores(); + docStoreIsCompoundFile = flushDocStores(); flushDocStores = false; - docStoreIsCompoundFile = useCompoundFile; } String segment = docWriter.getSegment(); + // If we are flushing docs, segment must not be null: + assert segment != null || !flushDocs; + if (flushDocs || flushDeletes) { SegmentInfos rollback = null; @@ -2013,7 +2064,8 @@ deleter.checkpoint(segmentInfos, autoCommit); - if (flushDocs && useCompoundFile) { + if (flushDocs && mergePolicy.useCompoundFile(segmentInfos, + newSegment)) { success = false; try { docWriter.createCompoundFile(segment); @@ -2029,16 +2081,10 @@ deleter.checkpoint(segmentInfos, autoCommit); } + return true; + } else + return false; - /* new merge policy - if (0 == docWriter.getMaxBufferedDocs()) - maybeMergeSegments(mergeFactor * numDocs / 2); - else - maybeMergeSegments(docWriter.getMaxBufferedDocs()); - */ - if (triggerMerge) - maybeMergeSegments(docWriter.getMaxBufferedDocs()); - } } finally { docWriter.clearFlushPending(); docWriter.resumeAllThreads(); @@ -2060,256 +2106,477 @@ ensureOpen(); return docWriter.getNumDocsInRAM(); } - - /** Incremental segment merger. */ - private final void maybeMergeSegments(int startUpperBound) throws CorruptIndexException, IOException { - long lowerBound = -1; - long upperBound = startUpperBound; - /* new merge policy - if (upperBound == 0) upperBound = 10; - */ + private int ensureContiguousMerge(MergePolicy.OneMerge merge) { - while (upperBound < maxMergeDocs) { - int minSegment = segmentInfos.size(); - int maxSegment = -1; + int first = segmentInfos.indexOf(merge.segments.info(0)); + if (first == -1) + throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current segments"); - // find merge-worthy segments - while (--minSegment >= 0) { - SegmentInfo si = segmentInfos.info(minSegment); + final int numSegments = segmentInfos.size(); + + final int numSegmentsToMerge = merge.segments.size(); + for(int i=0;i