Index: core/src/java/org/apache/lucene/index/LogMergePolicy.java =================================================================== --- core/src/java/org/apache/lucene/index/LogMergePolicy.java (revision 1376393) +++ core/src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) @@ -64,6 +64,13 @@ * @see #setNoCFSRatio */ public static final double DEFAULT_NO_CFS_RATIO = 0.1; + /** Default maxCFSSegmentSize value allows compound file + * for a segment of any size. The actual file format is + * still subject to noCFSRatio. + * @see #setMaxCFSSegmentSizeMB(double) + */ + public static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE; + protected int mergeFactor = DEFAULT_MERGE_FACTOR; protected long minMergeSize; @@ -74,6 +81,7 @@ protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; + protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE; protected boolean calibrateSizeByDeletes = true; @@ -136,21 +144,21 @@ // Javadoc inherited @Override public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException { - final boolean doCFS; - - if (!useCompoundFile) { - doCFS = false; - } else if (noCFSRatio == 1.0) { - doCFS = true; - } else { - long totalSize = 0; - for (SegmentInfoPerCommit info : infos) { - totalSize += size(info); - } - - doCFS = size(mergedInfo) <= noCFSRatio * totalSize; + if (!getUseCompoundFile()) { + return false; } - return doCFS; + long mergedInfoSize = size(mergedInfo); + if (mergedInfoSize > maxCFSSegmentSize) { + return false; + } + if (getNoCFSRatio() >= 1.0) { + return true; + } + long totalSize = 0; + for (SegmentInfoPerCommit info : infos) { + totalSize += size(info); + } + return mergedInfoSize <= getNoCFSRatio() * totalSize; } /** Sets whether compound file format should be used for @@ -674,9 +682,24 @@ sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", "); sb.append("maxMergeDocs=").append(maxMergeDocs).append(", "); sb.append("useCompoundFile=").append(useCompoundFile).append(", "); + sb.append("maxCFSSegmentSizeMB=").append(maxCFSSegmentSize/1024/1024.).append(", "); sb.append("noCFSRatio=").append(noCFSRatio); sb.append("]"); return sb.toString(); } - + + /** Returns the largest size allowed for a compound file segment */ + public final double getMaxCFSSegmentSizeMB() { + return maxCFSSegmentSize/1024/1024.; + } + + /** If a merged segment will be more than this value, + * leave the segment as + * non-compound file even if compound file is enabled. + * Set this to Long.MAX_VALUE (default) and noCFSRatio to 1.0 + * to always use CFS regardless of merge size. */ + public final void setMaxCFSSegmentSizeMB(double v) { + this.maxCFSSegmentSize = (long) (v*1024*1024); + } + } Index: core/src/java/org/apache/lucene/index/TieredMergePolicy.java =================================================================== --- core/src/java/org/apache/lucene/index/TieredMergePolicy.java (revision 1376393) +++ core/src/java/org/apache/lucene/index/TieredMergePolicy.java (working copy) @@ -84,6 +84,7 @@ private double forceMergeDeletesPctAllowed = 10.0; private boolean useCompoundFile = true; private double noCFSRatio = 0.1; + private long maxCFSSegmentSize = Long.MAX_VALUE; private double reclaimDeletesWeight = 2.0; /** Maximum number of segments to be merged at a time @@ -602,21 +603,21 @@ @Override public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException { - final boolean doCFS; - - if (!useCompoundFile) { - doCFS = false; - } else if (noCFSRatio == 1.0) { - doCFS = true; - } else { - long totalSize = 0; - for (SegmentInfoPerCommit info : infos) { + if (!getUseCompoundFile()) { + return false; + } + long mergedInfoSize = size(mergedInfo); + if (mergedInfoSize > maxCFSSegmentSize) { + return false; + } + if (getNoCFSRatio() >= 1.0) { + return true; + } + long totalSize = 0; + for (SegmentInfoPerCommit info : infos) { totalSize += size(info); - } - - doCFS = size(mergedInfo) <= noCFSRatio * totalSize; } - return doCFS; + return mergedInfoSize <= getNoCFSRatio() * totalSize; } @Override @@ -629,7 +630,7 @@ boolean hasDeletions = w.numDeletedDocs(info) > 0; return !hasDeletions && info.info.dir == w.getDirectory() && - (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0); + (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0 || maxCFSSegmentSize < Long.MAX_VALUE); } // Segment size in bytes, pro-rated by % deleted @@ -664,7 +665,22 @@ sb.append("forceMergeDeletesPctAllowed=").append(forceMergeDeletesPctAllowed).append(", "); sb.append("segmentsPerTier=").append(segsPerTier).append(", "); sb.append("useCompoundFile=").append(useCompoundFile).append(", "); + sb.append("maxCFSSegmentSizeMB=").append(maxCFSSegmentSize/1024/1024.).append(", "); sb.append("noCFSRatio=").append(noCFSRatio); return sb.toString(); } + + /** Returns the largest size allowed for a compound file segment */ + public final double getMaxCFSSegmentSizeMB() { + return maxCFSSegmentSize/1024/1024.; + } + + /** If a merged segment will be more than this value, + * leave the segment as + * non-compound file even if compound file is enabled. + * Set this to Long.MAX_VALUE (default) and noCFSRatio to 1.0 + * to always use CFS regardless of merge size. */ + public final void setMaxCFSSegmentSizeMB(double v) { + this.maxCFSSegmentSize = (long) (v*1024*1024); + } }