Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1059563) +++ lucene/CHANGES.txt (working copy) @@ -113,9 +113,10 @@ usage, allowing applications to accidentally open two writers on the same directory. (Mike McCandless) -* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now - affect optimize() as well (as opposed to only regular merges). This means that - you can run optimize() and too large segments won't be merged. (Shai Erera) +* LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on + LogMergePolicy now affect optimize() as well (as opposed to only regular + merges). This means that you can run optimize() and too large segments won't + be merged. (Shai Erera) * LUCENE-2753: IndexReader and DirectoryReader .listCommits() now return a List, guaranteeing the commits are sorted from oldest to latest. (Shai Erera) Index: lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (revision 1059563) +++ lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (working copy) @@ -63,7 +63,7 @@ conf = newWriterConfig(); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); - lmp.setMaxMergeMB((min + 1) / (1 << 20)); + lmp.setMaxMergeMBForOptimize((min + 1) / (1 << 20)); conf.setMergePolicy(lmp); writer = new IndexWriter(dir, conf); Index: lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java (revision 1059563) +++ lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java (working copy) @@ -31,9 +31,10 @@ public LogDocMergePolicy() { minMergeSize = DEFAULT_MIN_MERGE_DOCS; - // maxMergeSize is never used by LogDocMergePolicy; set + // maxMergeSize(ForOptimize) are never used by LogDocMergePolicy; set // it to Long.MAX_VALUE to disable it maxMergeSize = Long.MAX_VALUE; + maxMergeSizeForOptimize = Long.MAX_VALUE; } @Override Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (revision 1059563) +++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) @@ -63,6 +63,9 @@ protected long minMergeSize; protected long maxMergeSize; + // Although the core MPs set it explicitly, we must default in case someone + // out there wrote his own LMP ... + protected long maxMergeSizeForOptimize = Long.MAX_VALUE; protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; @@ -240,9 +243,9 @@ int start = last - 1; while (start >= 0) { SegmentInfo info = infos.info(start); - if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) { + if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) { if (verbose()) { - message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")"); + message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForOptimize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")"); } // need to skip that segment + add a merge for the 'right' segments, // unless there is only 1 which is optimized. @@ -326,9 +329,12 @@ } /** Returns the merges necessary to optimize the index. - * This merge policy defines "optimized" to mean only one - * segment in the index, where that segment has no - * deletions pending nor separate norms, and it is in + * This merge policy defines "optimized" to mean only the + * requested number of segments is left in the index, and + * respects the {@link #maxMergeSizeForOptimize} setting. + * By default, and assuming {@code maxNumSegments=1}, only + * one segment will be left in the index, where that segment + * has no deletions pending nor separate norms, and it is in * compound file format if the current useCompoundFile * setting is true. This method returns multiple merges * (mergeFactor at a time) so the {@link MergeScheduler} @@ -364,7 +370,7 @@ boolean anyTooLarge = false; for (int i = 0; i < last; i++) { SegmentInfo info = infos.info(i); - if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) { + if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) { anyTooLarge = true; break; } @@ -570,6 +576,7 @@ sb.append("minMergeSize=").append(minMergeSize).append(", "); sb.append("mergeFactor=").append(mergeFactor).append(", "); sb.append("maxMergeSize=").append(maxMergeSize).append(", "); + sb.append("maxMergeSizeForOptimize=").append(maxMergeSizeForOptimize).append(", "); sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", "); sb.append("maxMergeDocs=").append(maxMergeDocs).append(", "); sb.append("useCompoundFile=").append(useCompoundFile); Index: lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (revision 1059563) +++ lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (working copy) @@ -30,9 +30,14 @@ * or larger will never be merged. @see setMaxMergeMB */ public static final double DEFAULT_MAX_MERGE_MB = 2048; + /** Default maximum segment size. A segment of this size + * or larger will never be merged during optimize. @see setMaxMergeMBForOptimize */ + public static final double DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE = Long.MAX_VALUE; + public LogByteSizeMergePolicy() { minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024); maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024); + maxMergeSizeForOptimize = (long) (DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE*1024*1024); } @Override @@ -63,6 +68,23 @@ return ((double) maxMergeSize)/1024/1024; } + /**
Determines the largest segment (measured by total + * byte size of the segment's files, in MB) that may be + * merged with other segments during optimize. Setting + * it low will leave the index with more than 1 segment, + * even if {@link IndexWriter#optimize()} is called.*/ + public void setMaxMergeMBForOptimize(double mb) { + maxMergeSizeForOptimize = (long) (mb*1024*1024); + } + + /** Returns the largest segment (measured by total byte + * size of the segment's files, in MB) that may be merged + * with other segments during optimize. + * @see #setMaxMergeMBForOptimize */ + public double getMaxMergeMBForOptimize() { + return ((double) maxMergeSizeForOptimize)/1024/1024; + } + /** Sets the minimum size for the lowest level segments. * Any segments below this size are considered to be on * the same level (even if they vary drastically in size)