Index: lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java --- lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java Sun Jan 23 19:22:38 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java Mon Jan 24 08:52:36 2011 -0500 @@ -17,16 +17,16 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.HashSet; +import java.util.LinkedList; + +import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.IOUtils; -import java.util.LinkedList; -import java.util.HashSet; - -import java.io.IOException; - /** * Combines multiple files into a single compound file. * The file format:
@@ -76,7 +76,7 @@ private HashSet ids; private LinkedList entries; private boolean merged = false; - private SegmentMerger.CheckAbort checkAbort; + private MergeState.CheckAbort checkAbort; /** Create the compound stream in the specified file. The file name is the * entire name (no extensions are added). @@ -86,7 +86,7 @@ this(dir, name, null); } - CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) { + CompoundFileWriter(Directory dir, String name, MergeState.CheckAbort checkAbort) { if (dir == null) throw new NullPointerException("directory cannot be null"); if (name == null) Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java --- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Sun Jan 23 19:22:38 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon Jan 24 08:52:36 2011 -0500 @@ -481,6 +481,7 @@ if (size < 1) size = 1; levels[i] = (float) Math.log(size)/norm; + message("seg " + info.name + " level=" + levels[i]); } final float levelFloor; Index: lucene/src/java/org/apache/lucene/index/MergePolicy.java --- lucene/src/java/org/apache/lucene/index/MergePolicy.java Sun Jan 23 19:22:38 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/MergePolicy.java Mon Jan 24 08:52:36 2011 -0500 @@ -110,7 +110,7 @@ return aborted; } - synchronized void checkAborted(Directory dir) throws MergeAbortedException { + public synchronized void checkAborted(Directory dir) throws MergeAbortedException { if (aborted) { throw new MergeAbortedException("merge is aborted: " + segString(dir)); } Index: lucene/src/java/org/apache/lucene/index/SegmentMerger.java --- lucene/src/java/org/apache/lucene/index/SegmentMerger.java Sun Jan 23 19:22:38 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/SegmentMerger.java Mon Jan 24 08:52:36 2011 -0500 @@ -59,7 +59,7 @@ private int mergedDocs; - private final CheckAbort checkAbort; + private final MergeState.CheckAbort checkAbort; /** Maximum number of contiguous documents to bulk-copy when merging stored fields */ @@ -78,9 +78,9 @@ this.fieldInfos = fieldInfos; segment = name; if (merge != null) { - checkAbort = new CheckAbort(merge, directory); + checkAbort = new MergeState.CheckAbort(merge, directory); } else { - checkAbort = new CheckAbort(null, null) { + checkAbort = new MergeState.CheckAbort(null, null) { @Override public void work(double units) throws MergeAbortedException { // do nothing @@ -508,6 +508,7 @@ mergeState.hasPayloadProcessorProvider = payloadProcessorProvider != null; mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[mergeState.readerCount]; mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[mergeState.readerCount]; + mergeState.checkAbort = checkAbort; docBase = 0; int inputDocBase = 0; @@ -612,31 +613,4 @@ } } } - - static class CheckAbort { - private double workCount; - private MergePolicy.OneMerge merge; - private Directory dir; - public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { - this.merge = merge; - this.dir = dir; - } - - /** - * Records the fact that roughly units amount of work - * have been done since this method was last called. - * When adding time-consuming code into SegmentMerger, - * you should test different values for units to ensure - * that the time in between calls to merge.checkAborted - * is up to ~ 1 second. - */ - public void work(double units) throws MergePolicy.MergeAbortedException { - workCount += units; - if (workCount >= 10000.0) { - merge.checkAborted(dir); - workCount = 0; - } - } - } - } Index: lucene/src/java/org/apache/lucene/index/codecs/MergeState.java --- lucene/src/java/org/apache/lucene/index/codecs/MergeState.java Sun Jan 23 19:22:38 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/codecs/MergeState.java Mon Jan 24 08:52:36 2011 -0500 @@ -17,13 +17,16 @@ * limitations under the License. */ +import java.util.List; + import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor; import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; -import java.util.List; /** Holds common state used during segment merging * @@ -37,6 +40,7 @@ public int[] docBase; // New docID base per reader public int mergedDocCount; // Total # merged docs public Bits multiDeletedDocs; + public CheckAbort checkAbort; // Updated per field; public FieldInfo fieldInfo; @@ -45,5 +49,30 @@ public boolean hasPayloadProcessorProvider; public DirPayloadProcessor[] dirPayloadProcessor; public PayloadProcessor[] currentPayloadProcessor; - + + public static class CheckAbort { + private double workCount; + private MergePolicy.OneMerge merge; + private Directory dir; + public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { + this.merge = merge; + this.dir = dir; + } + + /** + * Records the fact that roughly units amount of work + * have been done since this method was last called. + * When adding time-consuming code into SegmentMerger, + * you should test different values for units to ensure + * that the time in between calls to merge.checkAborted + * is up to ~ 1 second. + */ + public void work(double units) throws MergePolicy.MergeAbortedException { + workCount += units; + if (workCount >= 10000.0) { + merge.checkAborted(dir); + workCount = 0; + } + } + } } Index: lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java --- lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Sun Jan 23 19:22:38 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Mon Jan 24 08:52:36 2011 -0500 @@ -56,6 +56,7 @@ BytesRef term; assert termsEnum != null; long sumTotalTermFreq = 0; + long sumDF = 0; if (mergeState.fieldInfo.omitTermFreqAndPositions) { if (docsEnum == null) { @@ -73,6 +74,11 @@ final TermStats stats = postingsConsumer.merge(mergeState, docsEnum); if (stats.docFreq > 0) { finishTerm(term, stats); + sumDF += stats.docFreq; + if (sumDF > 60000) { + mergeState.checkAbort.work(sumDF/5.0); + sumDF = 0; + } } } } @@ -99,6 +105,11 @@ if (stats.docFreq > 0) { finishTerm(term, stats); sumTotalTermFreq += stats.totalTermFreq; + sumDF += stats.docFreq; + if (sumDF > 60000) { + mergeState.checkAbort.work(sumDF/5.0); + sumDF = 0; + } } } }