Index: src/java/org/apache/lucene/index/LogDocMergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/LogDocMergePolicy.java (revision 773951) +++ src/java/org/apache/lucene/index/LogDocMergePolicy.java (working copy) @@ -1,5 +1,7 @@ package org.apache.lucene.index; +import java.io.IOException; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -34,8 +36,8 @@ // it to Long.MAX_VALUE to disable it maxMergeSize = Long.MAX_VALUE; } - protected long size(SegmentInfo info) { - return info.docCount; + protected long size(SegmentInfo info) throws IOException { + return sizeDocs(info); } /** Sets the minimum size for the lowest level segments. Index: src/java/org/apache/lucene/index/LogMergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/LogMergePolicy.java (revision 773951) +++ src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) @@ -59,6 +59,8 @@ long maxMergeSize; int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + protected boolean calibrateSizeByDeletes = false; + private boolean useCompoundFile = true; private boolean useCompoundDocStore = true; private IndexWriter writer; @@ -132,10 +134,40 @@ return useCompoundDocStore; } + /** Sets whether the segment size should be calibrated by + * the number of deletes when choosing segments for merge. */ + public void setCalibrateSizeByDeletes(boolean calibrateSizeByDeletes) { + this.calibrateSizeByDeletes = calibrateSizeByDeletes; + } + + /** Returns true if the segment size should be calibrated + * by the number of deletes when choosing segments for merge. */ + public boolean getCalibrateSizeByDeletes() { + return calibrateSizeByDeletes; + } + public void close() {} abstract protected long size(SegmentInfo info) throws IOException; + protected long sizeDocs(SegmentInfo info) throws IOException { + if (calibrateSizeByDeletes) { + return (info.docCount - (long)info.getDelCount()); + } else { + return info.docCount; + } + } + + protected long sizeBytes(SegmentInfo info) throws IOException { + long byteSize = info.sizeInBytes(); + if (calibrateSizeByDeletes) { + float delRatio = (info.docCount <= 0 ? 0.0f : ((float)info.getDelCount() / (float)info.docCount)); + return (info.docCount <= 0 ? byteSize : (long)((float)byteSize * (1.0f - delRatio))); + } else { + return byteSize; + } + } + private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set segmentsToOptimize) throws IOException { final int numSegments = infos.size(); int numToOptimize = 0; @@ -390,7 +422,7 @@ boolean anyTooLarge = false; for(int i=start;i= maxMergeSize || info.docCount >= maxMergeDocs); + anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs); } if (!anyTooLarge) { Index: src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java =================================================================== --- src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (revision 773951) +++ src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (working copy) @@ -36,7 +36,7 @@ maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024); } protected long size(SegmentInfo info) throws IOException { - return info.sizeInBytes(); + return sizeBytes(info); } /**

Determines the largest segment (measured by total