Index: lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java --- lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java Wed Jun 15 14:40:24 2011 -0400 +++ lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java Wed Jun 15 16:05:30 2011 -0400 @@ -20,7 +20,7 @@ import java.io.IOException; import java.util.Collections; -import java.util.Set; +import java.util.Map; /** * Merge policy that tries to balance not doing large @@ -105,7 +105,7 @@ } @Override - public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set segmentsToOptimize) throws IOException { + public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Map segmentsToOptimize) throws IOException { assert maxNumSegments > 0; @@ -120,8 +120,7 @@ while(last > 0) { final SegmentInfo info = infos.info(--last); - if (segmentsToOptimize.contains(info)) { - + if (segmentsToOptimize.containsKey(info)) { last++; break; } Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java --- lucene/src/java/org/apache/lucene/index/IndexWriter.java Wed Jun 15 14:40:24 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/IndexWriter.java Wed Jun 15 16:05:30 2011 -0400 @@ -236,7 +236,7 @@ private DocumentsWriter docWriter; final IndexFileDeleter deleter; - private Set segmentsToOptimize = new HashSet(); // used by optimize to note those needing optimization + private Map segmentsToOptimize = new HashMap(); // used by optimize to note those needing optimization private int optimizeMaxNumSegments; private Lock writeLock; @@ -1664,7 +1664,9 @@ synchronized(this) { resetMergeExceptions(); segmentsToOptimize.clear(); - segmentsToOptimize.addAll(segmentInfos.asSet()); + for(SegmentInfo info : segmentInfos) { + segmentsToOptimize.put(info, Boolean.TRUE); + } optimizeMaxNumSegments = maxNumSegments; // Now mark all pending & running merges as optimize @@ -1888,7 +1890,7 @@ final MergePolicy.MergeSpecification spec; if (optimize) { - spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableSet(segmentsToOptimize)); + spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableMap(segmentsToOptimize)); if (spec != null) { final int numMerges = spec.merges.size(); @@ -3042,7 +3044,7 @@ if (merge.optimize) { // cascade the optimize: - segmentsToOptimize.add(merge.info); + segmentsToOptimize.put(merge.info, Boolean.FALSE); } return true; @@ -3086,7 +3088,7 @@ * * @lucene.experimental */ - public final void merge(MergePolicy.OneMerge merge) + public void merge(MergePolicy.OneMerge merge) throws CorruptIndexException, IOException { boolean success = false; @@ -3167,7 +3169,7 @@ if (info.dir != directory) { isExternal = true; } - if (segmentsToOptimize.contains(info)) { + if (segmentsToOptimize.containsKey(info)) { merge.optimize = true; merge.maxNumSegmentsOptimize = optimizeMaxNumSegments; } Index: lucene/src/java/org/apache/lucene/index/LogMergePolicy.java --- lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Wed Jun 15 14:40:24 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Wed Jun 15 16:05:30 2011 -0400 @@ -21,7 +21,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; -import java.util.Set; +import java.util.Map; /**

This class implements a {@link MergePolicy} that tries * to merge segments into levels of exponentially @@ -201,20 +201,23 @@ } } - protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set segmentsToOptimize) throws IOException { + protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Map segmentsToOptimize) throws IOException { final int numSegments = infos.size(); int numToOptimize = 0; SegmentInfo optimizeInfo = null; + boolean segmentIsOriginal = false; for(int i=0;i segmentsToOptimize) throws IOException { + int maxNumSegments, Map segmentsToOptimize) throws IOException { assert maxNumSegments > 0; if (verbose()) { @@ -368,7 +371,7 @@ int last = infos.size(); while (last > 0) { final SegmentInfo info = infos.info(--last); - if (segmentsToOptimize.contains(info)) { + if (segmentsToOptimize.get(info) != null) { last++; break; } Index: lucene/src/java/org/apache/lucene/index/MergePolicy.java --- lucene/src/java/org/apache/lucene/index/MergePolicy.java Wed Jun 15 14:40:24 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/MergePolicy.java Wed Jun 15 16:05:30 2011 -0400 @@ -24,7 +24,7 @@ import java.io.IOException; import java.util.List; import java.util.ArrayList; -import java.util.Set; +import java.util.Map; /** *

Expert: a MergePolicy determines the sequence of @@ -297,10 +297,15 @@ * is always 1) * @param segmentsToOptimize * contains the specific SegmentInfo instances that must be merged - * away. This may be a subset of all SegmentInfos. + * away. This may be a subset of all + * SegmentInfos. If the value is True for a + * given SegmentInfo, that means this segment was + * an original segment present in the + * to-be-optimized index; else, it was a segment + * produced by a cascaded merge. */ public abstract MergeSpecification findMergesForOptimize( - SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize) + SegmentInfos segmentInfos, int maxSegmentCount, Map segmentsToOptimize) throws CorruptIndexException, IOException; /** Index: lucene/src/java/org/apache/lucene/index/NoMergePolicy.java --- lucene/src/java/org/apache/lucene/index/NoMergePolicy.java Wed Jun 15 14:40:24 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/NoMergePolicy.java Wed Jun 15 16:05:30 2011 -0400 @@ -18,7 +18,7 @@ */ import java.io.IOException; -import java.util.Set; +import java.util.Map; /** * A {@link MergePolicy} which never returns merges to execute (hence it's @@ -59,7 +59,7 @@ @Override public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, - int maxSegmentCount, Set segmentsToOptimize) + int maxSegmentCount, Map segmentsToOptimize) throws CorruptIndexException, IOException { return null; } @Override Index: lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java --- lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java Wed Jun 15 14:40:24 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java Wed Jun 15 16:05:30 2011 -0400 @@ -18,7 +18,7 @@ */ import java.io.IOException; -import java.util.Set; +import java.util.Map; import java.util.Collection; import java.util.Collections; import java.util.HashSet; @@ -454,7 +454,7 @@ } @Override - public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxSegmentCount, Set segmentsToOptimize) throws IOException { + public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxSegmentCount, Map segmentsToOptimize) throws IOException { if (verbose()) { message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize); } @@ -462,8 +462,11 @@ List eligible = new ArrayList(); boolean optimizeMergeRunning = false; final Collection merging = writer.get().getMergingSegments(); + boolean segmentIsOriginal = false; for(SegmentInfo info : infos) { - if (segmentsToOptimize.contains(info)) { + final Boolean isOriginal = segmentsToOptimize.get(info); + if (isOriginal != null) { + segmentIsOriginal = isOriginal; if (!merging.contains(info)) { eligible.add(info); } else { @@ -477,7 +480,7 @@ } if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) || - (maxSegmentCount == 1 && eligible.size() == 1 && isOptimized(eligible.get(0)))) { + (maxSegmentCount == 1 && eligible.size() == 1 && (!segmentIsOriginal || isOptimized(eligible.get(0))))) { if (verbose()) { message("already optimized"); } Index: lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java --- lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java Wed Jun 15 14:40:24 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java Wed Jun 15 16:05:30 2011 -0400 @@ -21,9 +21,9 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; -import java.util.Set; +import java.util.Map; +import java.util.HashMap; /** This {@link MergePolicy} is used for upgrading all existing segments of * an index when calling {@link IndexWriter#optimize()}. @@ -79,12 +79,13 @@ } @Override - public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize) throws CorruptIndexException, IOException { + public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Map segmentsToOptimize) throws CorruptIndexException, IOException { // first find all old segments - final HashSet oldSegments = new HashSet(); + final Map oldSegments = new HashMap(); for (final SegmentInfo si : segmentInfos) { - if (segmentsToOptimize.contains(si) && shouldUpgradeSegment(si)) { - oldSegments.add(si); + final Boolean v =segmentsToOptimize.get(si); + if (v != null && shouldUpgradeSegment(si)) { + oldSegments.put(si, v); } } @@ -93,14 +94,16 @@ if (oldSegments.isEmpty()) return null; - MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments); + MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments); if (spec != null) { // remove all segments that are in merge specification from oldSegments, // the resulting set contains all segments that are left over // and will be merged to one additional segment: for (final OneMerge om : spec.merges) { - oldSegments.removeAll(om.segments); + for(SegmentInfo info : om.segments) { + oldSegments.remove(info); + } } } @@ -110,7 +113,7 @@ " does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments); final List newInfos = new ArrayList(); for (final SegmentInfo si : segmentInfos) { - if (oldSegments.contains(si)) { + if (oldSegments.containsKey(si)) { newInfos.add(si); } } Index: lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java --- lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java Wed Jun 15 14:40:24 2011 -0400 +++ lucene/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java Wed Jun 15 16:05:30 2011 -0400 @@ -22,7 +22,7 @@ import java.util.Collections; import java.util.List; import java.util.Random; -import java.util.Set; +import java.util.Map; import org.apache.lucene.util._TestUtil; @@ -56,12 +56,12 @@ @Override public MergeSpecification findMergesForOptimize( - SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize) + SegmentInfos segmentInfos, int maxSegmentCount, Map segmentsToOptimize) throws CorruptIndexException, IOException { final List eligibleSegments = new ArrayList(); for(SegmentInfo info : segmentInfos) { - if (segmentsToOptimize.contains(info)) { + if (segmentsToOptimize.containsKey(info)) { eligibleSegments.add(info); } } @@ -85,7 +85,7 @@ if (mergeSpec != null) { for(OneMerge merge : mergeSpec.merges) { for(SegmentInfo info : merge.segments) { - assert segmentsToOptimize.contains(info); + assert segmentsToOptimize.containsKey(info); } } } Index: lucene/src/test/org/apache/lucene/index/TestOptimizeForever.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ lucene/src/test/org/apache/lucene/index/TestOptimizeForever.java Wed Jun 15 16:05:30 2011 -0400 @@ -0,0 +1,106 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LineFileDocs; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + + +public class TestOptimizeForever extends LuceneTestCase { + + // Just counts how many merges are done for optimize + private static class MyIndexWriter extends IndexWriter { + + AtomicInteger optimizeMergeCount = new AtomicInteger(); + private boolean first; + + public MyIndexWriter(Directory dir, IndexWriterConfig conf) throws Exception { + super(dir, conf); + } + + @Override + public void merge(MergePolicy.OneMerge merge) throws CorruptIndexException, IOException { + if (merge.optimize && (first || merge.segments.size() == 1)) { + first = false; + if (VERBOSE) { + System.out.println("TEST: optimized merge"); + } + optimizeMergeCount.incrementAndGet(); + } + super.merge(merge); + } + } + + public void test() throws Exception { + final Directory d = newDirectory(); + final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + w.setInfoStream(VERBOSE ? System.out : null); + + // Try to make an index that requires optimizing: + w.getConfig().setMaxBufferedDocs(_TestUtil.nextInt(random, 2, 11)); + final int numStartDocs = atLeast(20); + final LineFileDocs docs = new LineFileDocs(random); + for(int docIDX=0;docIDX segmentsToOptimize) + int maxSegmentCount, Map segmentsToOptimize) throws CorruptIndexException, IOException { return null; }