Index: lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
--- lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java Sun Jan 23 19:22:38 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java Mon Jan 24 08:52:36 2011 -0500
@@ -17,16 +17,16 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.LinkedList;
+
+import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
-import java.util.LinkedList;
-import java.util.HashSet;
-
-import java.io.IOException;
-
/**
* Combines multiple files into a single compound file.
* The file format:
@@ -76,7 +76,7 @@
private HashSet ids;
private LinkedList entries;
private boolean merged = false;
- private SegmentMerger.CheckAbort checkAbort;
+ private MergeState.CheckAbort checkAbort;
/** Create the compound stream in the specified file. The file name is the
* entire name (no extensions are added).
@@ -86,7 +86,7 @@
this(dir, name, null);
}
- CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) {
+ CompoundFileWriter(Directory dir, String name, MergeState.CheckAbort checkAbort) {
if (dir == null)
throw new NullPointerException("directory cannot be null");
if (name == null)
Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
--- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Sun Jan 23 19:22:38 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon Jan 24 08:52:36 2011 -0500
@@ -481,6 +481,7 @@
if (size < 1)
size = 1;
levels[i] = (float) Math.log(size)/norm;
+ message("seg " + info.name + " level=" + levels[i]);
}
final float levelFloor;
Index: lucene/src/java/org/apache/lucene/index/MergePolicy.java
--- lucene/src/java/org/apache/lucene/index/MergePolicy.java Sun Jan 23 19:22:38 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/MergePolicy.java Mon Jan 24 08:52:36 2011 -0500
@@ -110,7 +110,7 @@
return aborted;
}
- synchronized void checkAborted(Directory dir) throws MergeAbortedException {
+ public synchronized void checkAborted(Directory dir) throws MergeAbortedException {
if (aborted) {
throw new MergeAbortedException("merge is aborted: " + segString(dir));
}
Index: lucene/src/java/org/apache/lucene/index/SegmentMerger.java
--- lucene/src/java/org/apache/lucene/index/SegmentMerger.java Sun Jan 23 19:22:38 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/SegmentMerger.java Mon Jan 24 08:52:36 2011 -0500
@@ -59,7 +59,7 @@
private int mergedDocs;
- private final CheckAbort checkAbort;
+ private final MergeState.CheckAbort checkAbort;
/** Maximum number of contiguous documents to bulk-copy
when merging stored fields */
@@ -78,9 +78,9 @@
this.fieldInfos = fieldInfos;
segment = name;
if (merge != null) {
- checkAbort = new CheckAbort(merge, directory);
+ checkAbort = new MergeState.CheckAbort(merge, directory);
} else {
- checkAbort = new CheckAbort(null, null) {
+ checkAbort = new MergeState.CheckAbort(null, null) {
@Override
public void work(double units) throws MergeAbortedException {
// do nothing
@@ -508,6 +508,7 @@
mergeState.hasPayloadProcessorProvider = payloadProcessorProvider != null;
mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[mergeState.readerCount];
mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[mergeState.readerCount];
+ mergeState.checkAbort = checkAbort;
docBase = 0;
int inputDocBase = 0;
@@ -612,31 +613,4 @@
}
}
}
-
- static class CheckAbort {
- private double workCount;
- private MergePolicy.OneMerge merge;
- private Directory dir;
- public CheckAbort(MergePolicy.OneMerge merge, Directory dir) {
- this.merge = merge;
- this.dir = dir;
- }
-
- /**
- * Records the fact that roughly units amount of work
- * have been done since this method was last called.
- * When adding time-consuming code into SegmentMerger,
- * you should test different values for units to ensure
- * that the time in between calls to merge.checkAborted
- * is up to ~ 1 second.
- */
- public void work(double units) throws MergePolicy.MergeAbortedException {
- workCount += units;
- if (workCount >= 10000.0) {
- merge.checkAborted(dir);
- workCount = 0;
- }
- }
- }
-
}
Index: lucene/src/java/org/apache/lucene/index/codecs/MergeState.java
--- lucene/src/java/org/apache/lucene/index/codecs/MergeState.java Sun Jan 23 19:22:38 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/codecs/MergeState.java Mon Jan 24 08:52:36 2011 -0500
@@ -17,13 +17,16 @@
* limitations under the License.
*/
+import java.util.List;
+
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
-import java.util.List;
/** Holds common state used during segment merging
*
@@ -37,6 +40,7 @@
public int[] docBase; // New docID base per reader
public int mergedDocCount; // Total # merged docs
public Bits multiDeletedDocs;
+ public CheckAbort checkAbort;
// Updated per field;
public FieldInfo fieldInfo;
@@ -45,5 +49,30 @@
public boolean hasPayloadProcessorProvider;
public DirPayloadProcessor[] dirPayloadProcessor;
public PayloadProcessor[] currentPayloadProcessor;
-
+
+ public static class CheckAbort {
+ private double workCount;
+ private MergePolicy.OneMerge merge;
+ private Directory dir;
+ public CheckAbort(MergePolicy.OneMerge merge, Directory dir) {
+ this.merge = merge;
+ this.dir = dir;
+ }
+
+ /**
+ * Records the fact that roughly units amount of work
+ * have been done since this method was last called.
+ * When adding time-consuming code into SegmentMerger,
+ * you should test different values for units to ensure
+ * that the time in between calls to merge.checkAborted
+ * is up to ~ 1 second.
+ */
+ public void work(double units) throws MergePolicy.MergeAbortedException {
+ workCount += units;
+ if (workCount >= 10000.0) {
+ merge.checkAborted(dir);
+ workCount = 0;
+ }
+ }
+ }
}
Index: lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
--- lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Sun Jan 23 19:22:38 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Mon Jan 24 08:52:36 2011 -0500
@@ -56,6 +56,7 @@
BytesRef term;
assert termsEnum != null;
long sumTotalTermFreq = 0;
+ long sumDF = 0;
if (mergeState.fieldInfo.omitTermFreqAndPositions) {
if (docsEnum == null) {
@@ -73,6 +74,11 @@
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
if (stats.docFreq > 0) {
finishTerm(term, stats);
+ sumDF += stats.docFreq;
+ if (sumDF > 60000) {
+ mergeState.checkAbort.work(sumDF/5.0);
+ sumDF = 0;
+ }
}
}
}
@@ -99,6 +105,11 @@
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
+ sumDF += stats.docFreq;
+ if (sumDF > 60000) {
+ mergeState.checkAbort.work(sumDF/5.0);
+ sumDF = 0;
+ }
}
}
}