Index: lucene/src/java/org/apache/lucene/index/MergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MergePolicy.java (date 1291292814000) +++ lucene/src/java/org/apache/lucene/index/MergePolicy.java (revision ) @@ -76,16 +76,14 @@ SegmentReader[] readers; // used by IndexWriter SegmentReader[] readersClone; // used by IndexWriter public final SegmentInfos segments; - public final boolean useCompoundFile; boolean aborted; Throwable error; boolean paused; - public OneMerge(SegmentInfos segments, boolean useCompoundFile) { + public OneMerge(SegmentInfos segments) { if (0 == segments.size()) throw new RuntimeException("segments must include at least one segment"); this.segments = segments; - this.useCompoundFile = useCompoundFile; } /** Record that an exception occurred while executing @@ -314,10 +312,9 @@ public abstract void close(); /** - * Returns true if a newly flushed (not from merge) - * segment should use the compound file format. + * Returns true if a new segment (regardless of its origin) should use the compound file format. */ - public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment); + public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException; /** * Returns true if the doc store files should use the Index: lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java =================================================================== --- lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java (date 1291292814000) +++ lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java (revision ) @@ -132,11 +132,10 @@ // Since we must optimize down to 1 segment, the // choice is simple: - boolean useCompoundFile = getUseCompoundFile(); if (last > 1 || !isOptimized(infos.info(0))) { spec = new MergeSpecification(); - spec.add(new OneMerge(infos.range(0, last), useCompoundFile)); + spec.add(new OneMerge(infos.range(0, last))); } } else if (last > maxNumSegments) { @@ -153,7 +152,6 @@ if (infoLen <= maxNumSegments) return null; MergeSpecification spec = new MergeSpecification(); - boolean useCompoundFile = getUseCompoundFile(); // use Viterbi algorithm to find the best segmentation. // we will try to minimize the size variance of resulting segments. @@ -194,7 +192,7 @@ prev = backLink[i][prev]; int mergeStart = i + prev; if((mergeEnd - mergeStart) > 1) { - spec.add(new OneMerge(infos.range(mergeStart, mergeEnd), useCompoundFile)); + spec.add(new OneMerge(infos.range(mergeStart, mergeEnd))); } else { if(partialExpunge) { SegmentInfo info = infos.info(mergeStart); @@ -210,7 +208,7 @@ if(partialExpunge && maxDelCount > 0) { // expunge deletes - spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1), useCompoundFile)); + spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1))); } return spec; @@ -260,7 +258,7 @@ for(int i = 0; i < numLargeSegs; i++) { SegmentInfo info = infos.info(i); if(info.hasDeletions()) { - spec.add(new OneMerge(infos.range(i, i + 1), getUseCompoundFile())); + spec.add(new OneMerge(infos.range(i, i + 1))); } } return spec; @@ -298,7 +296,7 @@ if(totalSmallSegSize < targetSegSize * 2) { MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge); if(spec == null) spec = new MergeSpecification(); // should not happen - spec.add(new OneMerge(infos.range(numLargeSegs, numSegs), getUseCompoundFile())); + spec.add(new OneMerge(infos.range(numLargeSegs, numSegs))); return spec; } else { return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge); @@ -313,7 +311,7 @@ if(size(info) < sizeThreshold) break; startSeg++; } - spec.add(new OneMerge(infos.range(startSeg, numSegs), getUseCompoundFile())); + spec.add(new OneMerge(infos.range(startSeg, numSegs))); return spec; } else { // apply the log merge policy to small segments. @@ -344,7 +342,7 @@ } } if (maxDelCount > 0) { - return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1), getUseCompoundFile()); + return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1)); } return null; } Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (date 1291292814000) +++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (revision ) @@ -127,9 +127,22 @@ // Javadoc inherited @Override - public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) { - return useCompoundFile; + public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException { + final boolean doCFS; + + if (!useCompoundFile) { + doCFS = false; + } else if (noCFSRatio == 1.0) { + doCFS = true; + } else { + long totalSize = 0; + for (SegmentInfo info : infos) + totalSize += size(info); + + doCFS = size(mergedInfo) <= noCFSRatio * totalSize; - } + } + return doCFS; + } /** Sets whether compound file format should be used for * newly flushed and newly merged segments. */ @@ -254,12 +267,12 @@ // unless there is only 1 which is optimized. if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) { // there is more than 1 segment to the right of this one, or an unoptimized single segment. - spec.add(makeOneMerge(infos, infos.range(start + 1, last))); + spec.add(new OneMerge(infos.range(start + 1, last))); } last = start; } else if (last - start == mergeFactor) { // mergeFactor eligible segments were found, add them as a merge. - spec.add(makeOneMerge(infos, infos.range(start, last))); + spec.add(new OneMerge(infos.range(start, last))); last = start; } --start; @@ -267,7 +280,7 @@ // Add any left-over segments, unless there is just 1 already optimized. if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) { - spec.add(makeOneMerge(infos, infos.range(start, last))); + spec.add(new OneMerge(infos.range(start, last))); } return spec.merges.size() == 0 ? null : spec; @@ -284,7 +297,7 @@ // First, enroll all "full" merges (size // mergeFactor) to potentially be run concurrently: while (last - maxNumSegments + 1 >= mergeFactor) { - spec.add(makeOneMerge(infos, infos.range(last-mergeFactor, last))); + spec.add(new OneMerge(infos.range(last - mergeFactor, last))); last -= mergeFactor; } @@ -296,7 +309,7 @@ // Since we must optimize down to 1 segment, the // choice is simple: if (last > 1 || !isOptimized(infos.info(0))) { - spec.add(makeOneMerge(infos, infos.range(0, last))); + spec.add(new OneMerge(infos.range(0, last))); } } else if (last > maxNumSegments) { @@ -325,7 +338,7 @@ } } - spec.add(makeOneMerge(infos, infos.range(bestStart, bestStart+finalMergeSize))); + spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize))); } } return spec.merges.size() == 0 ? null : spec; @@ -413,7 +426,7 @@ // deletions, so force a merge now: if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { @@ -422,7 +435,7 @@ // mergeFactor segments if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } @@ -430,7 +443,7 @@ if (firstSegmentWithDeletions != -1) { if (verbose()) message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive"); - spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, numSegments))); + spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments))); } return spec; @@ -530,7 +543,7 @@ spec = new MergeSpecification(); if (verbose()) message(" " + start + " to " + end + ": add this merge"); - spec.add(makeOneMerge(infos, infos.range(start, end))); + spec.add(new OneMerge(infos.range(start, end))); } else if (verbose()) message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); @@ -544,29 +557,6 @@ return spec; } - protected OneMerge makeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) throws IOException { - final boolean doCFS; - if (!useCompoundFile) { - doCFS = false; - } else if (noCFSRatio == 1.0) { - doCFS = true; - } else { - - long totSize = 0; - for(SegmentInfo info : infos) { - totSize += size(info); - } - long mergeSize = 0; - for(SegmentInfo info : infosToMerge) { - mergeSize += size(info); - } - - doCFS = mergeSize <= noCFSRatio * totSize; - } - - return new OneMerge(infosToMerge, doCFS); - } - /**
Determines the largest segment (measured by
* document count) that may be merged with other segments.
* Small values (e.g., less than 10,000) are best for
Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/IndexWriter.java (date 1291292814000)
+++ lucene/src/java/org/apache/lucene/index/IndexWriter.java (revision )
@@ -50,13 +50,13 @@
An IndexWriter creates and maintains an index.
The create argument to the {@link
- #IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines
+ #IndexWriter(Directory, IndexWriterConfig) constructor} determines
whether a new index is created, or whether an existing index is
opened. Note that you can open an index with create=true
even while readers are using the index. The old readers will
continue to search the "point in time" snapshot they had opened,
and won't see the newly created index until they re-open. There are
- also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
+ also {@link #IndexWriter(Directory, IndexWriterConfig) constructors}
with no create argument which will create a new index
if there is not already an index at the provided path and otherwise
open the existing index.
These changes are buffered in memory and periodically flushed to the {@link Directory} (during the above method calls). A flush is triggered when there are enough - buffered deletes (see {@link #setMaxBufferedDeleteTerms}) + buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) or enough added documents since the last flush, whichever is sooner. For the added documents, flushing is triggered either by RAM usage of the documents (see {@link - #setRAMBufferSizeMB}) or the number of added documents. + IndexWriterConfig#setRAMBufferSizeMB}) or the number of added documents. The default is to flush when RAM usage hits 16 MB. For best indexing speed you should flush by RAM usage with a large RAM buffer. Note that flushing just moves the @@ -1248,8 +1248,8 @@ /** * Adds a document to this index. If the document contains more than - * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are - * discarded. + * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, + * the remainder are discarded. * *
Note that if an Exception is hit (for example disk full) * then the index will be consistent, but this document @@ -1297,7 +1297,7 @@ /** * Adds a document to this index, using the provided analyzer instead of the * value of {@link #getAnalyzer()}. If the document contains more than - * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are + * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are * discarded. * *
See {@link #addDocument(Document)} for details on
@@ -1603,7 +1603,7 @@
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
- * @see LogMergePolicy#findMergesForOptimize
+ * @see MergePolicy#findMergesForOptimize
*/
public void optimize() throws CorruptIndexException, IOException {
optimize(true);
@@ -2309,7 +2309,7 @@
}
// Now create the compound file if needed
- if (mergePolicy instanceof LogMergePolicy && ((LogMergePolicy) mergePolicy).getUseCompoundFile()) {
+ if (mergePolicy.useCompoundFile(segmentInfos, info)) {
List