Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java =================================================================== --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 1099031) +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (working copy) @@ -32,6 +32,7 @@ import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.NoMergeScheduler; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; @@ -145,6 +146,9 @@ LogMergePolicy logMergePolicy = (LogMergePolicy) iwConf.getMergePolicy(); logMergePolicy.setUseCompoundFile(isCompound); logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR)); + } else if(iwConf.getMergePolicy() instanceof TieredMergePolicy) { + TieredMergePolicy tieredMergePolicy = (TieredMergePolicy) iwConf.getMergePolicy(); + tieredMergePolicy.setUseCompoundFile(isCompound); } } final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB); Index: lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java =================================================================== --- lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java (revision 1099031) +++ lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java (working copy) @@ -31,6 +31,7 @@ import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; @@ -84,7 +85,7 @@ IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig( TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)) .setMaxBufferedDocs(2)); - ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(1000); + setMergeFactor(writer.getConfig().getMergePolicy(), 1000); for (int i=0; iFor normal merging, this policy first computes a + * "budget" of how many segments are allowed by be in the + * index. If the index is over-budget, then the policy + * sorts segments by decresing size (pro-rating by percent + * deletes), and then finds the least-cost merge. Merge + * cost is measured by a combination of the "skew" of the + * merge (size of largest seg divided by smallest seg), + * total merge size and pct deletes reclaimed, + * so that merges with lower skew, smaller size + * and those reclaiming more deletes, are + * favored. + * + *

If a merge will produce a segment that's larger than + * {@link #setMaxMergedSegmentMB}, then the policy will + * merge fewer segments (down to 1 at once, if that one has + * deletions) to keep the segment size under budget. + * + * NOTE: this policy freely merges non-adjacent + * segments; if this is a problem, use {@link + * LogMergePolicy}. + * + *

NOTE: This policy always merges by byte size + * of the segments, always pro-rates by percent deletes, + * and does not apply any maximum segment size during + * optimize (unlike {@link LogByteSizeMergePolicy}. + * + * @lucene.experimental + */ + +// TODO +// - we could try to take into account whether a large +// merge is already running (under CMS) and then bias +// ourselves towards picking smaller merges if so (or, +// maybe CMS should do so) + +public class TieredMergePolicy extends MergePolicy { + + private int maxMergeAtOnce = 10; + private long maxMergedSegmentBytes = 5*1024*1024*1024L; + private int maxMergeAtOnceExplicit = 30; + + private long floorSegmentBytes = 2*1024*1024L; + private double segsPerTier = 10.0; + private double expungeDeletesPctAllowed = 10.0; + private boolean useCompoundFile = true; + private double noCFSRatio = 0.1; + + /** Maximum number of segments to be merged at a time + * during "normal" merging. For explicit merging (eg, + * optimize or expungeDeletes was called), see {@link + * #setMaxMergeAtOnceExplicit}. Default is 10. */ + public TieredMergePolicy setMaxMergeAtOnce(int v) { + if (v < 2) { + throw new IllegalArgumentException("maxMergeAtOnce must be > 1 (got " + v + ")"); + } + maxMergeAtOnce = v; + return this; + } + + /** @see #setMaxMergeAtOnce */ + public int getMaxMergeAtOnce() { + return maxMergeAtOnce; + } + + // TODO: should addIndexes do explicit merging, too? And, + // if user calls IW.maybeMerge "explicitly" + + /** Maximum number of segments to be merged at a time, + * during optimize or expungeDeletes. Default is 30. */ + public TieredMergePolicy setMaxMergeAtOnceExplicit(int v) { + if (v < 2) { + throw new IllegalArgumentException("maxMergeAtOnceExplicit must be > 1 (got " + v + ")"); + } + maxMergeAtOnceExplicit = v; + return this; + } + + /** @see #setMaxMergeAtOnceExplicit */ + public int getMaxMergeAtOnceExplicit() { + return maxMergeAtOnceExplicit; + } + + /** Maximum sized segment to produce during + * normal merging. This setting is approximate: the + * estimate of the merged segment size is made by summing + * sizes of to-be-merged segments (compensating for + * percent deleted docs). Default is 5 GB. */ + public TieredMergePolicy setMaxMergedSegmentMB(double v) { + maxMergedSegmentBytes = (long) (v*1024*1024); + return this; + } + + /** @see #getMaxMergedSegmentMB */ + public double getMaxMergedSegmentMB() { + return maxMergedSegmentBytes/1024/1024.; + } + + /** Segments smaller than this are "rounded up" to this + * size, ie treated as equal (floor) size for merge + * selection. This is to prevent frequent flushing of + * tiny segments from allowing a long tail in the index. + * Default is 2 MB. */ + public TieredMergePolicy setFloorSegmentMB(double v) { + if (v <= 0.0) { + throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")"); + } + floorSegmentBytes = (long) (v*1024*1024); + return this; + } + + /** @see #setFloorSegmentMB */ + public double getFloorSegmentMB() { + return floorSegmentBytes/1024*1024.; + } + + /** When expungeDeletes is called, we only merge away a + * segment if its delete percentage is over this + * threshold. Default is 10%. */ + public TieredMergePolicy setExpungeDeletesPctAllowed(double v) { + if (v < 0.0 || v > 100.0) { + throw new IllegalArgumentException("expungeDeletesPctAllowed must be between 0.0 and 100.0 inclusive (got " + v + ")"); + } + expungeDeletesPctAllowed = v; + return this; + } + + /** @see #setExpungeDeletesPctAllowed */ + public double getExpungeDeletesPctAllowed() { + return expungeDeletesPctAllowed; + } + + /** Sets the allowed number of segments per tier. Smaller + * values mean more merging but fewer segments. + * setMaxMergeAtOnce} otherwise you'll hit + * Default is 10.0. */ + public TieredMergePolicy setSegmentsPerTier(double v) { + if (v < 2.0) { + throw new IllegalArgumentException("segmentsPerTier must be >= 2.0 (got " + v + ")"); + } + segsPerTier = v; + return this; + } + + /** @see #setSegmentsPerTier */ + public double getSegmentsPerTier() { + return segsPerTier; + } + + /** Sets whether compound file format should be used for + * newly flushed and newly merged segments. Default + * true. */ + public TieredMergePolicy setUseCompoundFile(boolean useCompoundFile) { + this.useCompoundFile = useCompoundFile; + return this; + } + + /** @see #setUseCompoundFile */ + public boolean getUseCompoundFile() { + return useCompoundFile; + } + + /** If a merged segment will be more than this percentage + * of the total size of the index, leave the segment as + * non-compound file even if compound file is enabled. + * Set to 1.0 to always use CFS regardless of merge + * size. Default is 0.1. */ + public TieredMergePolicy setNoCFSRatio(double noCFSRatio) { + if (noCFSRatio < 0.0 || noCFSRatio > 1.0) { + throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio); + } + this.noCFSRatio = noCFSRatio; + return this; + } + + /** @see #setNoCFSRatio */ + public double getNoCFSRatio() { + return noCFSRatio; + } + + private class SegmentByteSizeDescending implements Comparator { + public int compare(SegmentInfo o1, SegmentInfo o2) { + try { + final long sz1 = size(o1); + final long sz2 = size(o2); + if (sz1 > sz2) { + return -1; + } else if (sz2 > sz1) { + return 1; + } else { + return o1.name.compareTo(o2.name); + } + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + } + + private final Comparator segmentByteSizeDescending = new SegmentByteSizeDescending(); + + protected static abstract class MergeScore { + abstract double getScore(); + abstract String getExplanation(); + } + + @Override + public MergeSpecification findMerges(SegmentInfos infos) throws IOException { + if (verbose()) { + message("findMerges: " + infos.size() + " segments"); + } + if (infos.size() == 0) { + return null; + } + final Collection merging = writer.get().getMergingSegments(); + final Collection toBeMerged = new HashSet(); + + final SegmentInfos infosSorted = new SegmentInfos(); + infosSorted.addAll(infos); + + Collections.sort(infosSorted, segmentByteSizeDescending); + + // Compute total index bytes & print details about the index + long totIndexBytes = 0; + long minSegmentBytes = Long.MAX_VALUE; + for(SegmentInfo info : infosSorted) { + final long segBytes = size(info); + if (verbose()) { + String extra = merging.contains(info) ? " [merging]" : ""; + if (segBytes >= maxMergedSegmentBytes/2.0) { + extra += " [skip: too large]"; + } else if (segBytes < floorSegmentBytes) { + extra += " [floored]"; + } + message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra); + } + + minSegmentBytes = Math.min(segBytes, minSegmentBytes); + // Accum total byte size + totIndexBytes += segBytes; + } + + // If we have too-large segments, grace them out + // of the maxSegmentCount: + int tooBigCount = 0; + while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) { + totIndexBytes -= size(infosSorted.get(tooBigCount)); + tooBigCount++; + } + + minSegmentBytes = floorSize(minSegmentBytes); + + // Compute max allowed segs in the index + long levelSize = minSegmentBytes; + long bytesLeft = totIndexBytes; + double allowedSegCount = 0; + while(true) { + final double segCountLevel = bytesLeft / (double) levelSize; + if (segCountLevel < segsPerTier) { + allowedSegCount += Math.ceil(segCountLevel); + break; + } + allowedSegCount += segsPerTier; + bytesLeft -= segsPerTier * levelSize; + levelSize *= maxMergeAtOnce; + } + int allowedSegCountInt = (int) allowedSegCount; + + MergeSpecification spec = null; + + // Cycle to possibly select more than one merge: + while(true) { + + long mergingBytes = 0; + + // Gather eligible segments for merging, ie segments + // not already being merged and not already picked (by + // prior iteration of this loop) for merging: + final SegmentInfos eligible = new SegmentInfos(); + for(int idx = tooBigCount; idx= maxMergedSegmentBytes; + + message(" allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.size() + " (eligible count=" + eligible.size() + ") tooBigCount=" + tooBigCount); + + if (eligible.size() == 0) { + return spec; + } + + if (eligible.size() >= allowedSegCountInt) { + + // OK we are over budget -- find best merge! + MergeScore bestScore = null; + SegmentInfos best = null; + boolean bestTooLarge = false; + long bestMergeBytes = 0; + + // Consider all merge starts: + for(int startIdx = 0;startIdx <= eligible.size()-maxMergeAtOnce; startIdx++) { + + long totAfterMergeBytes = 0; + + final SegmentInfos candidate = new SegmentInfos(); + boolean hitTooLarge = false; + for(int idx = startIdx;idx maxMergedSegmentBytes) { + hitTooLarge = true; + // NOTE: we continue, so that we can try + // "packing" smaller segments into this merge + // to see if we can get closer to the max + // size; this in general is not perfect since + // this is really "bin packing" and we'd have + // to try different permutations. + continue; + } + candidate.add(info); + totAfterMergeBytes += segBytes; + } + + final MergeScore score = score(candidate, hitTooLarge, mergingBytes); + message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format("%.3f MB", totAfterMergeBytes/1024./1024.)); + + // If we are already running a max sized merge + // (maxMergeIsRunning), don't allow another max + // sized merge to kick off: + if ((bestScore == null || score.getScore() < bestScore.getScore()) && (!hitTooLarge || !maxMergeIsRunning)) { + best = candidate; + bestScore = score; + bestTooLarge = hitTooLarge; + bestMergeBytes = totAfterMergeBytes; + } + } + + if (best != null) { + if (spec == null) { + spec = new MergeSpecification(); + } + final OneMerge merge = new OneMerge(best); + spec.add(merge); + for(SegmentInfo info : merge.segments) { + toBeMerged.add(info); + } + + if (verbose()) { + message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format("%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format("%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : "")); + } + } else { + return spec; + } + } else { + return spec; + } + } + } + + /** Expert: scores one merge; subclasses can override. */ + protected MergeScore score(SegmentInfos candidate, boolean hitTooLarge, long mergingBytes) throws IOException { + long totBeforeMergeBytes = 0; + long totAfterMergeBytes = 0; + long totAfterMergeBytesFloored = 0; + for(SegmentInfo info : candidate) { + final long segBytes = size(info); + totAfterMergeBytes += segBytes; + totAfterMergeBytesFloored += floorSize(segBytes); + totBeforeMergeBytes += info.sizeInBytes(true); + } + + // Measure "skew" of the merge, which can range + // from 1.0/numSegsBeingMerged (good) to 1.0 + // (poor): + final double skew; + if (hitTooLarge) { + // Pretend the merge has perfect skew; skew doesn't + // matter in this case because this merge will not + // "cascade" and so it cannot lead to N^2 merge cost + // over time: + skew = 1.0/maxMergeAtOnce; + } else { + skew = ((double) floorSize(size(candidate.info(0))))/totAfterMergeBytesFloored; + } + + // Strongly favor merges with less skew (smaller + // mergeScore is better): + double mergeScore = skew; + + // Gently favor smaller merges over bigger ones. We + // don't want to make this exponent too large else we + // can end up doing poor merges of small segments in + // order to avoid the large merges: + mergeScore *= Math.pow(totAfterMergeBytes, 0.05); + + // Strongly favor merges that reclaim deletes: + final double nonDelRatio = ((double) totAfterMergeBytes)/totBeforeMergeBytes; + mergeScore *= nonDelRatio; + + final double finalMergeScore = mergeScore; + + return new MergeScore() { + + @Override + public double getScore() { + return finalMergeScore; + } + + @Override + public String getExplanation() { + return "skew=" + String.format("%.3f", skew) + " nonDelRatio=" + String.format("%.3f", nonDelRatio); + } + }; + } + + @Override + public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxSegmentCount, Set segmentsToOptimize) throws IOException { + if (verbose()) { + message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize); + } + SegmentInfos eligible = new SegmentInfos(); + boolean optimizeMergeRunning = false; + final Collection merging = writer.get().getMergingSegments(); + for(SegmentInfo info : infos) { + if (segmentsToOptimize.contains(info)) { + if (!merging.contains(info)) { + eligible.add(info); + } else { + optimizeMergeRunning = true; + } + } + } + + if (eligible.size() == 0) { + return null; + } + + if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) || + (maxSegmentCount == 1 && eligible.size() == 1 && isOptimized(eligible.get(0)))) { + if (verbose()) { + message("already optimized"); + } + return null; + } + + Collections.sort(eligible, segmentByteSizeDescending); + + if (verbose()) { + message("eligible=" + eligible); + message("optimizeMergeRunning=" + optimizeMergeRunning); + } + + int end = eligible.size(); + + MergeSpecification spec = null; + + // Do full merges, first, backwards: + while(end >= maxMergeAtOnceExplicit + maxSegmentCount - 1) { + if (spec == null) { + spec = new MergeSpecification(); + } + final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end)); + if (verbose()) { + message("add merge=" + writer.get().segString(merge.segments)); + } + spec.add(merge); + end -= maxMergeAtOnceExplicit; + } + + if (spec == null && !optimizeMergeRunning) { + // Do final merge + final int numToMerge = end - maxSegmentCount + 1; + final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end)); + if (verbose()) { + message("add final merge=" + merge.segString(writer.get().getDirectory())); + } + spec = new MergeSpecification(); + spec.add(merge); + } + + return spec; + } + + @Override + public MergeSpecification findMergesToExpungeDeletes(SegmentInfos infos) + throws CorruptIndexException, IOException { + if (verbose()) { + message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed); + } + final SegmentInfos eligible = new SegmentInfos(); + final Collection merging = writer.get().getMergingSegments(); + for(SegmentInfo info : infos) { + double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount; + if (pctDeletes > expungeDeletesPctAllowed && !merging.contains(info)) { + eligible.add(info); + } + } + + if (eligible.size() == 0) { + return null; + } + + Collections.sort(eligible, segmentByteSizeDescending); + + if (verbose()) { + message("eligible=" + eligible); + } + + int start = 0; + MergeSpecification spec = null; + + while(start < eligible.size()) { + long totAfterMergeBytes = 0; + int upto = start; + boolean done = false; + while(upto < start + maxMergeAtOnceExplicit) { + if (upto == eligible.size()) { + done = true; + break; + } + final SegmentInfo info = eligible.get(upto); + final long segBytes = size(info); + if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) { + // TODO: we could be smarter here, eg cherry + // picking smaller merges that'd sum up to just + // around the max size + break; + } + totAfterMergeBytes += segBytes; + upto++; + } + + if (upto == start) { + // Single segment is too big; grace it + start++; + continue; + } + + if (spec == null) { + spec = new MergeSpecification(); + } + + final OneMerge merge = new OneMerge(eligible.range(start, upto)); + if (verbose()) { + message("add merge=" + writer.get().segString(merge.segments)); + } + spec.add(merge); + start = upto; + if (done) { + break; + } + } + + return spec; + } + + @Override + public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException { + final boolean doCFS; + + if (!useCompoundFile) { + doCFS = false; + } else if (noCFSRatio == 1.0) { + doCFS = true; + } else { + long totalSize = 0; + for (SegmentInfo info : infos) + totalSize += size(info); + + doCFS = size(mergedInfo) <= noCFSRatio * totalSize; + } + return doCFS; + } + + @Override + public void close() { + } + + private boolean isOptimized(SegmentInfo info) + throws IOException { + IndexWriter w = writer.get(); + assert w != null; + boolean hasDeletions = w.numDeletedDocs(info) > 0; + return !hasDeletions && + !info.hasSeparateNorms() && + info.dir == w.getDirectory() && + (info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0); + } + + // Segment size in bytes, pro-rated by % deleted + private long size(SegmentInfo info) throws IOException { + final long byteSize = info.sizeInBytes(true); + final int delCount = writer.get().numDeletedDocs(info); + final double delRatio = (info.docCount <= 0 ? 0.0f : ((double)delCount / (double)info.docCount)); + assert delRatio <= 1.0; + return (long) (byteSize * (1.0-delRatio)); + } + + private long floorSize(long bytes) { + return Math.max(floorSegmentBytes, bytes); + } + + private boolean verbose() { + IndexWriter w = writer.get(); + return w != null && w.verbose(); + } + + private void message(String message) { + if (verbose()) { + writer.get().message("TMP: " + message); + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": "); + sb.append("maxMergeAtOnce=").append(maxMergeAtOnce).append(", "); + sb.append("maxMergeAtOnceExplicit=").append(maxMergeAtOnceExplicit).append(", "); + sb.append("maxMergedSegmentMB=").append(maxMergedSegmentBytes/1024/1024.).append(", "); + sb.append("floorSegmentMB=").append(floorSegmentBytes/1024/1024.).append(", "); + sb.append("expungeDeletesPctAllowed=").append(expungeDeletesPctAllowed).append(", "); + sb.append("segmentsPerTier=").append(segsPerTier).append(", "); + sb.append("useCompoundFile=").append(useCompoundFile).append(", "); + sb.append("noCFSRatio=").append(noCFSRatio); + return sb.toString(); + } +} \ No newline at end of file Property changes on: lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriter.java (revision 1099031) +++ lucene/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -2618,6 +2618,18 @@ } } + /** Expert: to be used by a {@link MergePolicy} to avoid + * selecting merges for segments already being merged. + * The returned collection is not cloned, and thus is + * only safe to access if you hold IndexWriter's lock + * (which you do when IndexWriter invokes the + * MergePolicy). + * + *

Do not alter the returned collection! */ + public synchronized Collection getMergingSegments() { + return mergingSegments; + } + /** * Expert: the {@link MergeScheduler} calls this method to retrieve the next * merge requested by the MergePolicy @@ -3418,9 +3430,6 @@ if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) { if (segmentInfos.indexOf(info) == -1) throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory); - else - throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle", - directory); } } @@ -4098,7 +4107,8 @@ return segString(segmentInfos); } - private synchronized String segString(SegmentInfos infos) { + /** @lucene.internal */ + public synchronized String segString(SegmentInfos infos) { StringBuilder buffer = new StringBuilder(); final int count = infos.size(); for(int i = 0; i < count; i++) { @@ -4113,6 +4123,26 @@ return buffer.toString(); } + public synchronized String segString(SegmentInfo info) throws IOException { + StringBuilder buffer = new StringBuilder(); + SegmentReader reader = readerPool.getIfExists(info); + try { + if (reader != null) { + buffer.append(reader.toString()); + } else { + buffer.append(info.toString(directory, 0)); + if (info.dir != directory) { + buffer.append("**"); + } + } + } finally { + if (reader != null) { + readerPool.release(reader); + } + } + return buffer.toString(); + } + private synchronized void doWait() { // NOTE: the callers of this method should in theory // be able to do simply wait(), but, as a defense Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1099031) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -43,8 +43,10 @@ import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.SlowMultiReaderWrapper; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FieldCache.CacheEntry; import org.apache.lucene.search.FieldCache; @@ -375,6 +377,27 @@ return getClass().getName() + "." + getName(); } + public static void setUseCompoundFile(MergePolicy mp, boolean useCompound) { + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(useCompound); + } else if (mp instanceof TieredMergePolicy) { + ((TieredMergePolicy) mp).setUseCompoundFile(useCompound); + } else { + fail("MergePolicy (compound-file) not supported " + mp); + } + } + + public static void setMergeFactor(MergePolicy mp, int mergeFactor) { + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setMergeFactor(mergeFactor); + } else if (mp instanceof TieredMergePolicy) { + ((TieredMergePolicy) mp).setMaxMergeAtOnce(mergeFactor); + ((TieredMergePolicy) mp).setMaxMergeAtOnceExplicit(mergeFactor); + } else { + fail("MergePolicy not supported " + mp); + } + } + @After public void tearDown() throws Exception { assertTrue("ensure your setUp() calls super.setUp()!!!", setup); @@ -634,7 +657,10 @@ IndexWriterConfig c = new IndexWriterConfig(v, a); if (r.nextBoolean()) { c.setMergePolicy(new LogDocMergePolicy()); + } else if (r.nextBoolean()) { + c.setMergePolicy(newTieredMergePolicy()); } + if (r.nextBoolean()) { c.setMergeScheduler(new SerialMergeScheduler()); } @@ -672,6 +698,10 @@ return newLogMergePolicy(random); } + public static TieredMergePolicy newTieredMergePolicy() { + return newTieredMergePolicy(random); + } + public static LogMergePolicy newLogMergePolicy(Random r) { LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy(); logmp.setUseCompoundFile(r.nextBoolean()); @@ -684,6 +714,24 @@ return logmp; } + public static TieredMergePolicy newTieredMergePolicy(Random r) { + TieredMergePolicy tmp = new TieredMergePolicy(); + if (r.nextInt(3) == 2) { + tmp.setMaxMergeAtOnce(2); + tmp.setMaxMergeAtOnceExplicit(2); + } else { + tmp.setMaxMergeAtOnce(_TestUtil.nextInt(r, 2, 20)); + tmp.setMaxMergeAtOnceExplicit(_TestUtil.nextInt(r, 2, 30)); + } + tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0); + tmp.setFloorSegmentMB(0.2 + r.nextDouble() * 2.0); + tmp.setExpungeDeletesPctAllowed(0.0 + r.nextDouble() * 30.0); + tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 2, 20)); + tmp.setUseCompoundFile(r.nextBoolean()); + tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8); + return tmp; + } + public static LogMergePolicy newLogMergePolicy(boolean useCFS) { LogMergePolicy logmp = newLogMergePolicy(); logmp.setUseCompoundFile(useCFS); Index: lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (revision 1099031) +++ lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (working copy) @@ -38,7 +38,9 @@ import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.store.Directory; public class _TestUtil { @@ -320,8 +322,14 @@ * count lowish */ public static void reduceOpenFiles(IndexWriter w) { // keep number of open files lowish - LogMergePolicy lmp = (LogMergePolicy) w.getMergePolicy(); - lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); + MergePolicy mp = w.getConfig().getMergePolicy(); + if (mp instanceof LogMergePolicy) { + LogMergePolicy lmp = (LogMergePolicy) mp; + lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); + } else if (mp instanceof TieredMergePolicy) { + TieredMergePolicy tmp = (TieredMergePolicy) mp; + tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce())); + } MergeScheduler ms = w.getConfig().getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) {