diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java index c7d2ddb..91c5793 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java @@ -26,12 +26,12 @@ import static org.apache.jackrabbit.oak.commons.PropertiesUtil.toLong; import static org.apache.jackrabbit.oak.osgi.OsgiUtil.lookupConfigurationThenFramework; import static org.apache.jackrabbit.oak.segment.SegmentNotFoundExceptionListener.IGNORE_SNFE; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.FORCE_TIMEOUT_DEFAULT; -import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GAIN_THRESHOLD_DEFAULT; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.MEMORY_THRESHOLD_DEFAULT; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.PAUSE_DEFAULT; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.RETAINED_GENERATIONS_DEFAULT; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.RETRY_COUNT_DEFAULT; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.SIZE_DELTA_ESTIMATION_DEFAULT; +import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.DISABLE_ESTIMATION_DEFAULT; import static org.apache.jackrabbit.oak.segment.file.FileStoreBuilder.fileStoreBuilder; import static org.apache.jackrabbit.oak.spi.blob.osgi.SplitBlobStoreService.ONLY_STANDALONE_TARGET; import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean; @@ -188,14 +188,6 @@ public class SegmentNodeStoreService extends ProxyNodeStore public static final String NODE_DEDUPLICATION_CACHE_SIZE = "nodeDeduplicationCache.size"; @Property( - byteValue = GAIN_THRESHOLD_DEFAULT, - label = "Compaction gain threshold", - description = "TarMK compaction gain threshold. The gain estimation prevents compaction from running " + - "if the provided threshold is not met. Value represents a percentage so an input between 0 and 100 is expected." - ) - public static final String COMPACTION_GAIN_THRESHOLD = "compaction.gainThreshold"; - - @Property( boolValue = PAUSE_DEFAULT, label = "Pause Compaction", description = "When enabled compaction would not be performed" @@ -226,6 +218,13 @@ public class SegmentNodeStoreService extends ProxyNodeStore description = "Amount of increase in repository size that will trigger compaction (bytes)" ) public static final String COMPACTION_SIZE_DELTA_ESTIMATION = "compaction.sizeDeltaEstimation"; + + @Property( + boolValue = DISABLE_ESTIMATION_DEFAULT, + label = "Disable Compaction Estimation Phase", + description = "Disables compaction estimation phase, thus allowing compaction to run every time." + ) + public static final String COMPACTION_DISABLE_ESTIMATION = "compaction.disableEstimation"; @Property( intValue = RETAINED_GENERATIONS_DEFAULT, @@ -632,14 +631,20 @@ public class SegmentNodeStoreService extends ProxyNodeStore int forceTimeout = toInteger(property(COMPACTION_FORCE_TIMEOUT), FORCE_TIMEOUT_DEFAULT); int retainedGenerations = toInteger(property(RETAINED_GENERATIONS), RETAINED_GENERATIONS_DEFAULT); - byte gainThreshold = getGainThreshold(); long sizeDeltaEstimation = toLong(property(COMPACTION_SIZE_DELTA_ESTIMATION), SIZE_DELTA_ESTIMATION_DEFAULT); int memoryThreshold = toInteger(property(MEMORY_THRESHOLD), MEMORY_THRESHOLD_DEFAULT); + boolean disableEstimation = toBoolean(property(COMPACTION_DISABLE_ESTIMATION), DISABLE_ESTIMATION_DEFAULT); + + if (property("compaction.gainThreshold") != null) { + log.warn("Deprecated property compaction.gainThreshold was detected. In order to configure compaction please use the new property " + + "compaction.sizeDeltaEstimation. For turning off estimation, the new property compaction.disableEstimation should be used."); + } - return new SegmentGCOptions(pauseCompaction, gainThreshold, retryCount, forceTimeout) + return new SegmentGCOptions(pauseCompaction, retryCount, forceTimeout) .setRetainedGenerations(retainedGenerations) .setGcSizeDeltaEstimation(sizeDeltaEstimation) - .setMemoryThreshold(memoryThreshold); + .setMemoryThreshold(memoryThreshold) + .setEstimationDisabled(disableEstimation); } private void unregisterNodeStore() { @@ -732,16 +737,6 @@ public class SegmentNodeStoreService extends ProxyNodeStore return Integer.parseInt(getMaxFileSizeProperty()); } - private byte getGainThreshold() { - String gt = property(COMPACTION_GAIN_THRESHOLD); - - if (gt == null) { - return GAIN_THRESHOLD_DEFAULT; - } - - return Byte.valueOf(gt); - } - private String property(String name) { return lookupConfigurationThenFramework(context, name); } diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java index 285b161..d2151e3 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java @@ -30,11 +30,11 @@ public class SegmentGCOptions { * Default value for {@link #isPaused()} */ public static final boolean PAUSE_DEFAULT = false; - + /** - * Default value for {@link #getGainThreshold()} + * Default value for {@link #isEstimationDisabled()} */ - public static final byte GAIN_THRESHOLD_DEFAULT = 10; + public static final boolean DISABLE_ESTIMATION_DEFAULT = false; /** * Default value for {@link #getRetryCount()} @@ -52,9 +52,9 @@ public class SegmentGCOptions { public static final int RETAINED_GENERATIONS_DEFAULT = 2; /** - * Default value for {@link #getGcSizeDeltaEstimation()} + * Default value for {@link #getGcSizeDeltaEstimation()} set to 10GB */ - public static final long SIZE_DELTA_ESTIMATION_DEFAULT = -1; + public static final long SIZE_DELTA_ESTIMATION_DEFAULT = 10737418240L; /** * Default value for {@link #getMemoryThreshold()} @@ -62,8 +62,11 @@ public class SegmentGCOptions { public static final int MEMORY_THRESHOLD_DEFAULT = 15; private boolean paused = PAUSE_DEFAULT; - - private int gainThreshold = GAIN_THRESHOLD_DEFAULT; + + /** + * Flag controlling whether the estimation phase will run before a GC cycle + */ + private boolean estimationDisabled = DISABLE_ESTIMATION_DEFAULT; private int retryCount = RETRY_COUNT_DEFAULT; @@ -86,15 +89,14 @@ public class SegmentGCOptions { "oak.segment.compaction.gcSizeDeltaEstimation", SIZE_DELTA_ESTIMATION_DEFAULT); - public SegmentGCOptions(boolean paused, int gainThreshold, int retryCount, int forceTimeout) { + public SegmentGCOptions(boolean paused, int retryCount, int forceTimeout) { this.paused = paused; - this.gainThreshold = gainThreshold; this.retryCount = retryCount; this.forceTimeout = forceTimeout; } public SegmentGCOptions() { - this(PAUSE_DEFAULT, GAIN_THRESHOLD_DEFAULT, RETRY_COUNT_DEFAULT, FORCE_TIMEOUT_DEFAULT); + this(PAUSE_DEFAULT, RETRY_COUNT_DEFAULT, FORCE_TIMEOUT_DEFAULT); } /** @@ -123,24 +125,6 @@ public class SegmentGCOptions { } /** - * Get the gain estimate threshold beyond which revision gc should run - * @return gainThreshold - */ - public int getGainThreshold() { - return gainThreshold; - } - - /** - * Set the revision gain estimate threshold beyond which revision gc should run - * @param gainThreshold - * @return this instance - */ - public SegmentGCOptions setGainThreshold(int gainThreshold) { - this.gainThreshold = gainThreshold; - return this; - } - - /** * Get the number of tries to compact concurrent commits on top of already * compacted commits * @return retry count @@ -215,12 +199,13 @@ public class SegmentGCOptions { return getClass().getSimpleName() + "{" + "offline=" + offline + ", retainedGenerations=" + retainedGenerations + + ", gcSizeDeltaEstimation=" + gcSizeDeltaEstimation + ", ocBinDeduplication=" + ocBinDeduplication + ", ocBinMaxSize=" + ocBinMaxSize + "}"; } else { return getClass().getSimpleName() + "{" + "paused=" + paused + - ", gainThreshold=" + gainThreshold + + ", estimationDisabled=" + estimationDisabled + ", retryCount=" + retryCount + ", forceTimeout=" + forceTimeout + ", retainedGenerations=" + retainedGenerations + @@ -288,10 +273,6 @@ public class SegmentGCOptions { return this.ocBinMaxSize; } - public boolean isGcSizeDeltaEstimation() { - return gcSizeDeltaEstimation >= 0; - } - public long getGcSizeDeltaEstimation() { return gcSizeDeltaEstimation; } @@ -323,4 +304,17 @@ public class SegmentGCOptions { this.memoryThreshold = memoryThreshold; return this; } + + public boolean isEstimationDisabled() { + return estimationDisabled; + } + + /** + * Disables the estimation phase, thus allowing GC to run every time. + * @return this instance + */ + public SegmentGCOptions setEstimationDisabled(boolean disabled) { + this.estimationDisabled = disabled; + return this; + } } diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java index 8df74b0..36f1e44 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java @@ -43,18 +43,6 @@ public interface SegmentRevisionGC { void setPausedCompaction(boolean paused); /** - * Get the gain estimate threshold beyond which revision gc should run - * @return gainThreshold - */ - int getGainThreshold(); - - /** - * Set the revision gain estimate threshold beyond which revision gc should run - * @param gainThreshold - */ - void setGainThreshold(int gainThreshold); - - /** * Get the number of tries to compact concurrent commits on top of already * compacted commits * @return retry count @@ -106,6 +94,14 @@ public interface SegmentRevisionGC { long getGcSizeDeltaEstimation(); void setGcSizeDeltaEstimation(long gcSizeDeltaEstimation); + + boolean isEstimationDisabled(); + + /** + * Disables the estimation phase, thus allowing GC to run every time. + * @param disabled + */ + void setEstimationDisabled(boolean disabled); /** * Initiate a revision garbage collection operation diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java index 9c0cd61..cfcdcce 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java @@ -64,16 +64,6 @@ public class SegmentRevisionGCMBean } @Override - public int getGainThreshold() { - return gcOptions.getGainThreshold(); - } - - @Override - public void setGainThreshold(int gainThreshold) { - gcOptions.setGainThreshold(gainThreshold); - } - - @Override public int getRetryCount() { return gcOptions.getRetryCount(); } @@ -113,6 +103,17 @@ public class SegmentRevisionGCMBean gcOptions.setGcSizeDeltaEstimation(gcSizeDeltaEstimation); } + + @Override + public boolean isEstimationDisabled() { + return gcOptions.isEstimationDisabled(); + } + + @Override + public void setEstimationDisabled(boolean disabled) { + gcOptions.setEstimationDisabled(disabled); + } + @Override public void startRevisionGC() { fileStore.getGCRunner().run(); diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/CompactionGainEstimate.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/CompactionGainEstimate.java deleted file mode 100644 index 57286b8..0000000 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/CompactionGainEstimate.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.jackrabbit.oak.segment.file; - -import static org.apache.jackrabbit.oak.api.Type.BINARIES; -import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount; - -import java.io.File; -import java.util.UUID; - -import com.google.common.base.Supplier; -import com.google.common.hash.BloomFilter; -import com.google.common.hash.Funnel; -import com.google.common.hash.PrimitiveSink; -import org.apache.jackrabbit.oak.api.Blob; -import org.apache.jackrabbit.oak.api.PropertyState; -import org.apache.jackrabbit.oak.segment.RecordIdSet; -import org.apache.jackrabbit.oak.segment.SegmentBlob; -import org.apache.jackrabbit.oak.segment.SegmentId; -import org.apache.jackrabbit.oak.segment.SegmentNodeState; -import org.apache.jackrabbit.oak.segment.SegmentPropertyState; -import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; - -class CompactionGainEstimate implements TarEntryVisitor, GCEstimation { - - private static final Funnel UUID_FUNNEL = new Funnel() { - @Override - public void funnel(UUID from, PrimitiveSink into) { - into.putLong(from.getMostSignificantBits()); - into.putLong(from.getLeastSignificantBits()); - } - }; - - private final BloomFilter uuids; - - private final int gainThreshold; - - private long totalSize = 0; - - private long reachableSize = 0; - - private boolean gcNeeded; - - private String gcInfo = "unknown"; - - private boolean finished = false; - - /** - * Create a new instance of gain estimator. The estimation process can be stopped - * by switching the supplier {@code stop} to {@code true}, in which case the returned - * estimates are undefined. - * - * @param node root node state - * @param estimatedBulkCount - * @param stop stop signal - */ - CompactionGainEstimate(SegmentNodeState node, int estimatedBulkCount, - Supplier stop, int gainThreshold) { - uuids = BloomFilter.create(UUID_FUNNEL, estimatedBulkCount); - this.gainThreshold = gainThreshold; - collectReferencedSegments(node, new RecordIdSet(), stop); - } - - private void collectReferencedSegments(SegmentNodeState node, RecordIdSet visited, Supplier stop) { - if (!stop.get() && visited.addIfNotPresent(node.getRecordId())) { - collectUUID(node.getRecordId().getSegmentId()); - for (PropertyState property : node.getProperties()) { - if (property instanceof SegmentPropertyState) { - collectUUID(((SegmentPropertyState) property) - .getRecordId().getSegmentId()); - } - - // Get the underlying value as stream so we can collect - // the segments ids involved in storing the value. - // This works as primitives are stored as strings and strings - // as binaries of their UTF-8 encoding. - for (Blob blob : property.getValue(BINARIES)) { - for (SegmentId id : SegmentBlob.getBulkSegmentIds(blob)) { - collectUUID(id); - } - } - } - for (ChildNodeEntry child : node.getChildNodeEntries()) { - collectReferencedSegments((SegmentNodeState) child.getNodeState(), - visited, stop); - } - } - } - - private void collectUUID(SegmentId segmentId) { - uuids.put(new UUID( - segmentId.getMostSignificantBits(), - segmentId.getLeastSignificantBits())); - } - - /** - * Returns a percentage estimate (scale 0-100) for how much disk space - * running compaction (and cleanup) could potentially release. - * - * @return percentage of disk space that could be freed with compaction - */ - public long estimateCompactionGain() { - if (totalSize == 0) { - return 0; - } - return 100 * (totalSize - reachableSize) / totalSize; - } - - private void run() { - if (finished) { - return; - } - long gain = estimateCompactionGain(); - gcNeeded = gain >= gainThreshold; - if (gcNeeded) { - gcInfo = String - .format("Gain is %s%% or %s/%s (%s/%s bytes), so running compaction", - gain, humanReadableByteCount(reachableSize), - humanReadableByteCount(totalSize), - reachableSize, totalSize); - } else { - if (totalSize == 0) { - gcInfo = "Skipping compaction for now as repository consists of a single tar file only"; - } else { - gcInfo = String - .format("Gain is %s%% or %s/%s (%s/%s bytes), so skipping compaction for now", - gain, - humanReadableByteCount(reachableSize), - humanReadableByteCount(totalSize), - reachableSize, totalSize); - } - } - finished = true; - } - - @Override - public boolean gcNeeded() { - if (!finished) { - run(); - } - return gcNeeded; - } - - @Override - public String gcLog() { - if (!finished) { - run(); - } - return gcInfo; - } - - // ---------------------------------------------------< TarEntryVisitor >-- - - @Override - public void visit(long msb, long lsb, File file, int offset, int size) { - UUID uuid = new UUID(msb, lsb); - int entrySize = TarReader.getEntrySize(size); - totalSize += entrySize; - if (uuids.mightContain(uuid)) { - reachableSize += entrySize; - } - } - -} diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java index 5732698..df68321 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java @@ -747,11 +747,9 @@ public class FileStore extends AbstractFileStore { GCMemoryBarrier gcMemoryBarrier = new GCMemoryBarrier( sufficientMemory, gcListener, GC_COUNT.get(), gcOptions); - int gainThreshold = gcOptions.getGainThreshold(); boolean sufficientEstimatedGain = true; - if (gainThreshold <= 0) { - gcListener.info("TarMK GC #{}: estimation skipped because gain threshold value ({} <= 0)", - GC_COUNT, gainThreshold); + if (gcOptions.isEstimationDisabled()) { + gcListener.info("TarMK GC #{}: estimation skipped because it was explicitly disabled", GC_COUNT); } else if (gcOptions.isPaused()) { gcListener.info("TarMK GC #{}: estimation skipped because compaction is paused", GC_COUNT); } else { @@ -805,26 +803,9 @@ public class FileStore extends AbstractFileStore { * @return compaction gain estimate */ synchronized GCEstimation estimateCompactionGain(Supplier stop) { - if (gcOptions.isGcSizeDeltaEstimation()) { SizeDeltaGcEstimation e = new SizeDeltaGcEstimation(gcOptions, gcJournal, stats.getApproximateSize()); return e; - } - - CompactionGainEstimate estimate = new CompactionGainEstimate(getHead(), - count(), stop, gcOptions.getGainThreshold()); - fileStoreLock.readLock().lock(); - try { - for (TarReader reader : readers) { - reader.accept(estimate); - if (stop.get()) { - break; - } - } - } finally { - fileStoreLock.readLock().unlock(); - } - return estimate; } private void logAndClear( diff --git oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCompactionIT.java oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCompactionIT.java index 0bcafb0..78d3c8c 100644 --- oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCompactionIT.java +++ oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCompactionIT.java @@ -221,7 +221,7 @@ public class SegmentCompactionIT { ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor(); SegmentGCOptions gcOptions = defaultGCOptions() - .setGainThreshold(0) + .setEstimationDisabled(true) .setForceTimeout(3600); fileStore = fileStoreBuilder(folder.getRoot()) .withMemoryMapping(true) diff --git oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java index 47bbd7b..4a4925f 100644 --- oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java +++ oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java @@ -81,10 +81,6 @@ public class CompactionEstimatorTest { try { GCEstimation est = fileStore.estimateCompactionGain(); assertTrue(est.gcNeeded()); - if (est instanceof CompactionGainEstimate) { - // should be at 66% - assertTrue(((CompactionGainEstimate) est).estimateCompactionGain() > 60); - } } finally { fileStore.close(); }