diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java index 8d1386d..4de2ce9 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java @@ -26,7 +26,6 @@ import static org.apache.jackrabbit.oak.commons.PropertiesUtil.toLong; import static org.apache.jackrabbit.oak.osgi.OsgiUtil.lookupConfigurationThenFramework; import static org.apache.jackrabbit.oak.segment.SegmentNotFoundExceptionListener.IGNORE_SNFE; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.FORCE_TIMEOUT_DEFAULT; -import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GAIN_THRESHOLD_DEFAULT; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.PAUSE_DEFAULT; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.RETAINED_GENERATIONS_DEFAULT; import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.RETRY_COUNT_DEFAULT; @@ -187,14 +186,6 @@ public class SegmentNodeStoreService extends ProxyNodeStore public static final String NODE_DEDUPLICATION_CACHE_SIZE = "nodeDeduplicationCache.size"; @Property( - byteValue = GAIN_THRESHOLD_DEFAULT, - label = "Compaction gain threshold", - description = "TarMK compaction gain threshold. The gain estimation prevents compaction from running " + - "if the provided threshold is not met. Value represents a percentage so an input between 0 and 100 is expected." - ) - public static final String COMPACTION_GAIN_THRESHOLD = "compaction.gainThreshold"; - - @Property( boolValue = PAUSE_DEFAULT, label = "Pause Compaction", description = "When enabled compaction would not be performed" @@ -622,10 +613,9 @@ public class SegmentNodeStoreService extends ProxyNodeStore int forceTimeout = toInteger(property(COMPACTION_FORCE_TIMEOUT), FORCE_TIMEOUT_DEFAULT); int retainedGenerations = toInteger(property(RETAINED_GENERATIONS), RETAINED_GENERATIONS_DEFAULT); - byte gainThreshold = getGainThreshold(); long sizeDeltaEstimation = toLong(property(COMPACTION_SIZE_DELTA_ESTIMATION), SIZE_DELTA_ESTIMATION_DEFAULT); - return new SegmentGCOptions(pauseCompaction, gainThreshold, retryCount, forceTimeout) + return new SegmentGCOptions(pauseCompaction, retryCount, forceTimeout) .setRetainedGenerations(retainedGenerations) .setGcSizeDeltaEstimation(sizeDeltaEstimation); } @@ -720,16 +710,6 @@ public class SegmentNodeStoreService extends ProxyNodeStore return Integer.parseInt(getMaxFileSizeProperty()); } - private byte getGainThreshold() { - String gt = property(COMPACTION_GAIN_THRESHOLD); - - if (gt == null) { - return GAIN_THRESHOLD_DEFAULT; - } - - return Byte.valueOf(gt); - } - private String property(String name) { return lookupConfigurationThenFramework(context, name); } diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java index 375740a..3922c50 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java @@ -30,11 +30,11 @@ public class SegmentGCOptions { * Default value for {@link #isPaused()} */ public static final boolean PAUSE_DEFAULT = false; - + /** - * Default value for {@link #getGainThreshold()} + * Default value for {@link #isEstimationDisabled()} */ - public static final byte GAIN_THRESHOLD_DEFAULT = 10; + public static final boolean ESTIMATION_DISABLED_DEFAULT = false; /** * Default value for {@link #getRetryCount()} @@ -52,13 +52,16 @@ public class SegmentGCOptions { public static final int RETAINED_GENERATIONS_DEFAULT = 2; /** - * Default value for {@link #getGcSizeDeltaEstimation()} + * Default value for {@link #getGcSizeDeltaEstimation()} set to 10GB */ - public static final long SIZE_DELTA_ESTIMATION_DEFAULT = -1; + public static final long SIZE_DELTA_ESTIMATION_DEFAULT = 10737418240L; private boolean paused = PAUSE_DEFAULT; - - private int gainThreshold = GAIN_THRESHOLD_DEFAULT; + + /** + * Flag controlling whether the estimation phase will run before a GC cycle + */ + private boolean estimationDisabled = ESTIMATION_DISABLED_DEFAULT; private int retryCount = RETRY_COUNT_DEFAULT; @@ -79,15 +82,14 @@ public class SegmentGCOptions { "oak.segment.compaction.gcSizeDeltaEstimation", SIZE_DELTA_ESTIMATION_DEFAULT); - public SegmentGCOptions(boolean paused, int gainThreshold, int retryCount, int forceTimeout) { + public SegmentGCOptions(boolean paused, int retryCount, int forceTimeout) { this.paused = paused; - this.gainThreshold = gainThreshold; this.retryCount = retryCount; this.forceTimeout = forceTimeout; } public SegmentGCOptions() { - this(PAUSE_DEFAULT, GAIN_THRESHOLD_DEFAULT, RETRY_COUNT_DEFAULT, FORCE_TIMEOUT_DEFAULT); + this(PAUSE_DEFAULT, RETRY_COUNT_DEFAULT, FORCE_TIMEOUT_DEFAULT); } /** @@ -116,24 +118,6 @@ public class SegmentGCOptions { } /** - * Get the gain estimate threshold beyond which revision gc should run - * @return gainThreshold - */ - public int getGainThreshold() { - return gainThreshold; - } - - /** - * Set the revision gain estimate threshold beyond which revision gc should run - * @param gainThreshold - * @return this instance - */ - public SegmentGCOptions setGainThreshold(int gainThreshold) { - this.gainThreshold = gainThreshold; - return this; - } - - /** * Get the number of tries to compact concurrent commits on top of already * compacted commits * @return retry count @@ -213,7 +197,7 @@ public class SegmentGCOptions { } else { return getClass().getSimpleName() + "{" + "paused=" + paused + - ", gainThreshold=" + gainThreshold + + ", estimationDisabled=" + estimationDisabled + ", retryCount=" + retryCount + ", forceTimeout=" + forceTimeout + ", retainedGenerations=" + retainedGenerations + @@ -281,10 +265,6 @@ public class SegmentGCOptions { return this.ocBinMaxSize; } - public boolean isGcSizeDeltaEstimation() { - return gcSizeDeltaEstimation >= 0; - } - public long getGcSizeDeltaEstimation() { return gcSizeDeltaEstimation; } @@ -294,4 +274,16 @@ public class SegmentGCOptions { return this; } + public boolean isEstimationDisabled() { + return estimationDisabled; + } + + /** + * Disables the estimation phase, thus allowing GC to run every time. + * @return this instance + */ + public SegmentGCOptions disableEstimation() { + this.estimationDisabled = true; + return this; + } } diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java index d01a83a..285705a 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java @@ -43,18 +43,6 @@ public interface SegmentRevisionGC { void setPausedCompaction(boolean paused); /** - * Get the gain estimate threshold beyond which revision gc should run - * @return gainThreshold - */ - int getGainThreshold(); - - /** - * Set the revision gain estimate threshold beyond which revision gc should run - * @param gainThreshold - */ - void setGainThreshold(int gainThreshold); - - /** * Get the number of tries to compact concurrent commits on top of already * compacted commits * @return retry count diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java index d9617f9..616b8b4 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java @@ -28,7 +28,6 @@ import org.apache.jackrabbit.oak.commons.jmx.AnnotatedStandardMBean; import org.apache.jackrabbit.oak.segment.file.FileStore; import org.apache.jackrabbit.oak.segment.file.FileStoreGCMonitor; -// FIXME OAK-4617: Align SegmentRevisionGC MBean with new generation based GC public class SegmentRevisionGCMBean extends AnnotatedStandardMBean implements SegmentRevisionGC { @@ -65,16 +64,6 @@ public class SegmentRevisionGCMBean } @Override - public int getGainThreshold() { - return gcOptions.getGainThreshold(); - } - - @Override - public void setGainThreshold(int gainThreshold) { - gcOptions.setGainThreshold(gainThreshold); - } - - @Override public int getRetryCount() { return gcOptions.getRetryCount(); } diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/CompactionGainEstimate.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/CompactionGainEstimate.java deleted file mode 100644 index 57286b8..0000000 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/CompactionGainEstimate.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.jackrabbit.oak.segment.file; - -import static org.apache.jackrabbit.oak.api.Type.BINARIES; -import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount; - -import java.io.File; -import java.util.UUID; - -import com.google.common.base.Supplier; -import com.google.common.hash.BloomFilter; -import com.google.common.hash.Funnel; -import com.google.common.hash.PrimitiveSink; -import org.apache.jackrabbit.oak.api.Blob; -import org.apache.jackrabbit.oak.api.PropertyState; -import org.apache.jackrabbit.oak.segment.RecordIdSet; -import org.apache.jackrabbit.oak.segment.SegmentBlob; -import org.apache.jackrabbit.oak.segment.SegmentId; -import org.apache.jackrabbit.oak.segment.SegmentNodeState; -import org.apache.jackrabbit.oak.segment.SegmentPropertyState; -import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; - -class CompactionGainEstimate implements TarEntryVisitor, GCEstimation { - - private static final Funnel UUID_FUNNEL = new Funnel() { - @Override - public void funnel(UUID from, PrimitiveSink into) { - into.putLong(from.getMostSignificantBits()); - into.putLong(from.getLeastSignificantBits()); - } - }; - - private final BloomFilter uuids; - - private final int gainThreshold; - - private long totalSize = 0; - - private long reachableSize = 0; - - private boolean gcNeeded; - - private String gcInfo = "unknown"; - - private boolean finished = false; - - /** - * Create a new instance of gain estimator. The estimation process can be stopped - * by switching the supplier {@code stop} to {@code true}, in which case the returned - * estimates are undefined. - * - * @param node root node state - * @param estimatedBulkCount - * @param stop stop signal - */ - CompactionGainEstimate(SegmentNodeState node, int estimatedBulkCount, - Supplier stop, int gainThreshold) { - uuids = BloomFilter.create(UUID_FUNNEL, estimatedBulkCount); - this.gainThreshold = gainThreshold; - collectReferencedSegments(node, new RecordIdSet(), stop); - } - - private void collectReferencedSegments(SegmentNodeState node, RecordIdSet visited, Supplier stop) { - if (!stop.get() && visited.addIfNotPresent(node.getRecordId())) { - collectUUID(node.getRecordId().getSegmentId()); - for (PropertyState property : node.getProperties()) { - if (property instanceof SegmentPropertyState) { - collectUUID(((SegmentPropertyState) property) - .getRecordId().getSegmentId()); - } - - // Get the underlying value as stream so we can collect - // the segments ids involved in storing the value. - // This works as primitives are stored as strings and strings - // as binaries of their UTF-8 encoding. - for (Blob blob : property.getValue(BINARIES)) { - for (SegmentId id : SegmentBlob.getBulkSegmentIds(blob)) { - collectUUID(id); - } - } - } - for (ChildNodeEntry child : node.getChildNodeEntries()) { - collectReferencedSegments((SegmentNodeState) child.getNodeState(), - visited, stop); - } - } - } - - private void collectUUID(SegmentId segmentId) { - uuids.put(new UUID( - segmentId.getMostSignificantBits(), - segmentId.getLeastSignificantBits())); - } - - /** - * Returns a percentage estimate (scale 0-100) for how much disk space - * running compaction (and cleanup) could potentially release. - * - * @return percentage of disk space that could be freed with compaction - */ - public long estimateCompactionGain() { - if (totalSize == 0) { - return 0; - } - return 100 * (totalSize - reachableSize) / totalSize; - } - - private void run() { - if (finished) { - return; - } - long gain = estimateCompactionGain(); - gcNeeded = gain >= gainThreshold; - if (gcNeeded) { - gcInfo = String - .format("Gain is %s%% or %s/%s (%s/%s bytes), so running compaction", - gain, humanReadableByteCount(reachableSize), - humanReadableByteCount(totalSize), - reachableSize, totalSize); - } else { - if (totalSize == 0) { - gcInfo = "Skipping compaction for now as repository consists of a single tar file only"; - } else { - gcInfo = String - .format("Gain is %s%% or %s/%s (%s/%s bytes), so skipping compaction for now", - gain, - humanReadableByteCount(reachableSize), - humanReadableByteCount(totalSize), - reachableSize, totalSize); - } - } - finished = true; - } - - @Override - public boolean gcNeeded() { - if (!finished) { - run(); - } - return gcNeeded; - } - - @Override - public String gcLog() { - if (!finished) { - run(); - } - return gcInfo; - } - - // ---------------------------------------------------< TarEntryVisitor >-- - - @Override - public void visit(long msb, long lsb, File file, int offset, int size) { - UUID uuid = new UUID(msb, lsb); - int entrySize = TarReader.getEntrySize(size); - totalSize += entrySize; - if (uuids.mightContain(uuid)) { - reachableSize += entrySize; - } - } - -} diff --git oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java index 6da8aa6..f1c1f89 100644 --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java @@ -737,11 +737,9 @@ public class FileStore extends AbstractFileStore { gcListener.info("TarMK GC #{}: started", GC_COUNT.incrementAndGet()); Stopwatch watch = Stopwatch.createStarted(); - int gainThreshold = gcOptions.getGainThreshold(); boolean sufficientEstimatedGain = true; - if (gainThreshold <= 0) { - gcListener.info("TarMK GC #{}: estimation skipped because gain threshold value ({} <= 0)", - GC_COUNT, gainThreshold); + if (gcOptions.isEstimationDisabled()) { + gcListener.info("TarMK GC #{}: estimation skipped because it was explicitly disabled", GC_COUNT); } else if (gcOptions.isPaused()) { gcListener.info("TarMK GC #{}: estimation skipped because compaction is paused", GC_COUNT); } else { @@ -792,26 +790,9 @@ public class FileStore extends AbstractFileStore { * @return compaction gain estimate */ synchronized GCEstimation estimateCompactionGain(Supplier stop) { - if (gcOptions.isGcSizeDeltaEstimation()) { SizeDeltaGcEstimation e = new SizeDeltaGcEstimation(gcOptions, gcJournal, stats.getApproximateSize()); return e; - } - - CompactionGainEstimate estimate = new CompactionGainEstimate(getHead(), - count(), stop, gcOptions.getGainThreshold()); - fileStoreLock.readLock().lock(); - try { - for (TarReader reader : readers) { - reader.accept(estimate); - if (stop.get()) { - break; - } - } - } finally { - fileStoreLock.readLock().unlock(); - } - return estimate; } private void logAndClear( diff --git oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCompactionIT.java oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCompactionIT.java index 0bcafb0..78e6d48 100644 --- oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCompactionIT.java +++ oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCompactionIT.java @@ -221,7 +221,6 @@ public class SegmentCompactionIT { ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor(); SegmentGCOptions gcOptions = defaultGCOptions() - .setGainThreshold(0) .setForceTimeout(3600); fileStore = fileStoreBuilder(folder.getRoot()) .withMemoryMapping(true) diff --git oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java index 47bbd7b..4a4925f 100644 --- oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java +++ oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java @@ -81,10 +81,6 @@ public class CompactionEstimatorTest { try { GCEstimation est = fileStore.estimateCompactionGain(); assertTrue(est.gcNeeded()); - if (est instanceof CompactionGainEstimate) { - // should be at 66% - assertTrue(((CompactionGainEstimate) est).estimateCompactionGain() > 60); - } } finally { fileStore.close(); }