Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java (revision 1755586) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentGCOptions.java (working copy) @@ -84,6 +84,9 @@ "oak.segment.compaction.binaryDeduplicationMaxSize", 100 * 1024 * 1024); + private long gcSizeDeltaEstimation = Long.getLong( + "oak.segment.compaction.gcSizeDeltaEstimation", -1); + public SegmentGCOptions(boolean paused, int memoryThreshold, int gainThreshold, int retryCount, boolean forceAfterFail, int lockWaitTime) { this.paused = paused; @@ -263,7 +266,8 @@ ", retryCount=" + retryCount + ", forceAfterFail=" + forceAfterFail + ", lockWaitTime=" + lockWaitTime + - ", retainedGenerations=" + retainedGenerations + "}"; + ", retainedGenerations=" + retainedGenerations + + ", gcSizeDeltaEstimation=" + gcSizeDeltaEstimation + "}"; } } @@ -326,4 +330,17 @@ public long getBinaryDeduplicationMaxSize() { return this.ocBinMaxSize; } + + public boolean isGcSizeDeltaEstimation() { + return gcSizeDeltaEstimation >= 0; + } + + public long getGcSizeDeltaEstimation() { + return gcSizeDeltaEstimation; + } + + public SegmentGCOptions setGcSizeDeltaEstimation(long gcSizeDeltaEstimation) { + this.gcSizeDeltaEstimation = gcSizeDeltaEstimation; + return this; + } } Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java (revision 1755586) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGC.java (working copy) @@ -119,4 +119,8 @@ */ void setRetainedGenerations(int retainedGenerations); + long getGcSizeDeltaEstimation(); + + void setGcSizeDeltaEstimation(long gcSizeDeltaEstimation); + } Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java (revision 1755586) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/compaction/SegmentRevisionGCMBean.java (working copy) @@ -102,4 +102,15 @@ public void setRetainedGenerations(int retainedGenerations) { gcOptions.setRetainedGenerations(retainedGenerations); } + + @Override + public long getGcSizeDeltaEstimation() { + return gcOptions.getGcSizeDeltaEstimation(); + } + + @Override + public void setGcSizeDeltaEstimation(long gcSizeDeltaEstimation) { + gcOptions.setGcSizeDeltaEstimation(gcSizeDeltaEstimation); + } + } Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/CompactionGainEstimate.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/CompactionGainEstimate.java (revision 1755586) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/CompactionGainEstimate.java (working copy) @@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.segment.file; import static org.apache.jackrabbit.oak.api.Type.BINARIES; +import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount; import java.io.File; import java.util.UUID; @@ -36,7 +37,7 @@ import org.apache.jackrabbit.oak.segment.SegmentPropertyState; import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; -class CompactionGainEstimate implements TarEntryVisitor { +class CompactionGainEstimate implements TarEntryVisitor, GCEstimation { private static final Funnel UUID_FUNNEL = new Funnel() { @Override @@ -48,10 +49,18 @@ private final BloomFilter uuids; + private final int gainThreshold; + private long totalSize = 0; private long reachableSize = 0; + private boolean gcNeeded; + + private String gcInfo = "unknown"; + + private boolean finished = false; + /** * Create a new instance of gain estimator. The estimation process can be stopped * by switching the supplier {@code stop} to {@code true}, in which case the returned @@ -61,8 +70,10 @@ * @param estimatedBulkCount * @param stop stop signal */ - CompactionGainEstimate(SegmentNodeState node, int estimatedBulkCount, Supplier stop) { + CompactionGainEstimate(SegmentNodeState node, int estimatedBulkCount, + Supplier stop, int gainThreshold) { uuids = BloomFilter.create(UUID_FUNNEL, estimatedBulkCount); + this.gainThreshold = gainThreshold; collectReferencedSegments(node, new RecordIdSet(), stop); } @@ -111,12 +122,47 @@ return 100 * (totalSize - reachableSize) / totalSize; } - public long getTotalSize() { - return totalSize; + private void run() { + if (finished) { + return; + } + long gain = estimateCompactionGain(); + gcNeeded = gain >= gainThreshold; + if (gcNeeded) { + gcInfo = String + .format("Gain is %s%% or %s/%s (%s/%s bytes), so running compaction", + gain, humanReadableByteCount(reachableSize), + humanReadableByteCount(totalSize), + reachableSize, totalSize); + } else { + if (totalSize == 0) { + gcInfo = "Skipping compaction for now as repository consists of a single tar file only"; + } else { + gcInfo = String + .format("Gain is %s%% or %s/%s (%s/%s bytes), so skipping compaction for now", + gain, + humanReadableByteCount(reachableSize), + humanReadableByteCount(totalSize), + reachableSize, totalSize); + } + } + finished = true; } - public long getReachableSize() { - return reachableSize; + @Override + public boolean gcNeeded() { + if (!finished) { + run(); + } + return gcNeeded; + } + + @Override + public String gcLog() { + if (!finished) { + run(); + } + return gcInfo; } // ---------------------------------------------------< TarEntryVisitor >-- Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java (revision 1755586) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java (working copy) @@ -184,6 +184,8 @@ private final SegmentGCOptions gcOptions; + private final GCJournal gcJournal; + /** * Flag to request revision cleanup during the next flush. */ @@ -269,6 +271,9 @@ this.directory = builder.getDirectory(); this.maxFileSize = builder.getMaxFileSize() * MB; this.memoryMapping = builder.getMemoryMapping(); + + this.gcJournal = new GCJournal(directory, this); + builder.withGCMonitor(gcJournal); this.gcListener = builder.getGcListener(); this.gcOptions = builder.getGcOptions(); @@ -460,34 +465,21 @@ } else { gcListener.info("TarMK GC #{}: estimation started", GC_COUNT); Supplier shutdown = newShutdownSignal(); - CompactionGainEstimate estimate = estimateCompactionGain(shutdown); + GCEstimation estimate = estimateCompactionGain(shutdown); if (shutdown.get()) { gcListener.info("TarMK GC #{}: estimation interrupted. Skipping compaction.", GC_COUNT); } - long gain = estimate.estimateCompactionGain(); - sufficientEstimatedGain = gain >= gainThreshold; + sufficientEstimatedGain = estimate.gcNeeded(); + String gcLog = estimate.gcLog(); if (sufficientEstimatedGain) { gcListener.info( - "TarMK GC #{}: estimation completed in {} ({} ms). " + - "Gain is {}% or {}/{} ({}/{} bytes), so running compaction", - GC_COUNT, watch, watch.elapsed(MILLISECONDS), gain, - humanReadableByteCount(estimate.getReachableSize()), humanReadableByteCount(estimate.getTotalSize()), - estimate.getReachableSize(), estimate.getTotalSize()); + "TarMK GC #{}: estimation completed in {} ({} ms). {}", + GC_COUNT, watch, watch.elapsed(MILLISECONDS), gcLog); } else { - if (estimate.getTotalSize() == 0) { - gcListener.skipped( - "TarMK GC #{}: estimation completed in {} ({} ms). " + - "Skipping compaction for now as repository consists of a single tar file only", - GC_COUNT, watch, watch.elapsed(MILLISECONDS)); - } else { - gcListener.skipped( - "TarMK GC #{}: estimation completed in {} ({} ms). " + - "Gain is {}% or {}/{} ({}/{} bytes), so skipping compaction for now", - GC_COUNT, watch, watch.elapsed(MILLISECONDS), gain, - humanReadableByteCount(estimate.getReachableSize()), humanReadableByteCount(estimate.getTotalSize()), - estimate.getReachableSize(), estimate.getTotalSize()); - } + gcListener.skipped( + "TarMK GC #{}: estimation completed in {} ({} ms). {}", + GC_COUNT, watch, watch.elapsed(MILLISECONDS), gcLog); } } @@ -660,8 +652,15 @@ * @param stop signal for stopping the estimation process. * @return compaction gain estimate */ - CompactionGainEstimate estimateCompactionGain(Supplier stop) { - CompactionGainEstimate estimate = new CompactionGainEstimate(getHead(), count(), stop); + GCEstimation estimateCompactionGain(Supplier stop) { + if (gcOptions.isGcSizeDeltaEstimation()) { + SizeDeltaGcEstimation e = new SizeDeltaGcEstimation(gcOptions, + gcJournal, stats.getApproximateSize()); + return e; + } + + CompactionGainEstimate estimate = new CompactionGainEstimate(getHead(), + count(), stop, gcOptions.getGainThreshold()); fileStoreLock.readLock().lock(); try { for (TarReader reader : readers) { Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/GCEstimation.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/GCEstimation.java (revision 0) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/GCEstimation.java (revision 0) @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.segment.file; + +public interface GCEstimation { + + /** + * Determines if the Gc operation needs to run or not + */ + boolean gcNeeded(); + + /** + * User friendly message explaining the value of the + * {@link GCEstimation#isGcNeeded()} flag + */ + String gcLog(); + +} Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/GCJournal.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/GCJournal.java (revision 0) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/GCJournal.java (revision 0) @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.segment.file; + +import static com.google.common.base.Charsets.UTF_8; +import static com.google.common.base.Preconditions.checkNotNull; +import static java.nio.file.Files.newBufferedWriter; +import static java.nio.file.Files.readAllLines; +import static java.nio.file.StandardOpenOption.APPEND; +import static java.nio.file.StandardOpenOption.CREATE; +import static java.nio.file.StandardOpenOption.DSYNC; +import static java.nio.file.StandardOpenOption.WRITE; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import javax.annotation.Nonnull; + +import org.apache.jackrabbit.oak.spi.gc.GCMonitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Persists the repository meta following a cleanup operation in the + * {@link #GC_JOURNAL gc journal} file under the format: 'record id, size, + * timestamp'. + */ +public class GCJournal extends GCMonitor.Empty implements GCMonitor { + + private static final Logger LOG = LoggerFactory.getLogger(GCJournal.class); + + public static final String GC_JOURNAL = "gc.log"; + + @Nonnull + private final File directory; + + @Nonnull + private final FileStore store; + + private GCJournalEntry latest; + + public GCJournal(@Nonnull File directory, FileStore store) { + this.directory = checkNotNull(directory); + this.store = checkNotNull(store); + } + + protected synchronized void persist(String head, long size) { + latest = new GCJournalEntry(head, size, System.currentTimeMillis()); + Path path = new File(directory, GC_JOURNAL).toPath(); + try { + try (BufferedWriter w = newBufferedWriter(path, UTF_8, WRITE, + APPEND, CREATE, DSYNC)) { + w.write(latest.toString()); + w.newLine(); + } + } catch (IOException e) { + LOG.error("Error writing gc journal", e); + } + } + + public synchronized GCJournalEntry read() { + if (latest == null) { + List all = readLines(); + if (all.isEmpty()) { + latest = GCJournalEntry.EMPTY; + } else { + String info = all.get(all.size() - 1); + latest = GCJournalEntry.fromString(info); + } + } + return latest; + } + + public synchronized Collection readAll() { + List all = new ArrayList(); + for (String l : readLines()) { + all.add(GCJournalEntry.fromString(l)); + } + return all; + } + + private List readLines() { + File file = new File(directory, GC_JOURNAL); + if (file.exists()) { + try { + return readAllLines(file.toPath(), UTF_8); + } catch (IOException e) { + LOG.error("Error reading gc journal", e); + } + } + return new ArrayList(); + } + + static class GCJournalEntry { + + static GCJournalEntry EMPTY = new GCJournalEntry("", -1, -1); + + private final String id; + private final long size; + private final long ts; + + public GCJournalEntry(String id, long size, long ts) { + this.id = id; + this.ts = ts; + this.size = size; + } + + @Override + public String toString() { + return id + "," + size + "," + ts; + } + + static GCJournalEntry fromString(String in) { + String[] items = in.split(","); + if (items.length == 3) { + String id = items[0]; + long size = safeParse(items[1]); + long ts = safeParse(items[2]); + return new GCJournalEntry(id, size, ts); + } + return GCJournalEntry.EMPTY; + } + + private static long safeParse(String in) { + try { + return Long.parseLong(in); + } catch (NumberFormatException ex) { + LOG.warn("Unable to parse {} as long value.", in, ex); + } + return -1; + } + + public String getId() { + return id; + } + + public long getSize() { + return size; + } + + public long getTs() { + return ts; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((id == null) ? 0 : id.hashCode()); + result = prime * result + (int) (size ^ (size >>> 32)); + result = prime * result + (int) (ts ^ (ts >>> 32)); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + GCJournalEntry other = (GCJournalEntry) obj; + if (id == null) { + if (other.id != null) + return false; + } else if (!id.equals(other.id)) + return false; + if (size != other.size) + return false; + if (ts != other.ts) + return false; + return true; + } + } + + @Override + public void cleaned(long reclaimedSize, long currentSize) { + String head = store.getHead().getRecordId().toString(); + persist(head, currentSize); + } +} Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/SizeDeltaGcEstimation.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/SizeDeltaGcEstimation.java (revision 0) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/SizeDeltaGcEstimation.java (revision 0) @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.segment.file; + +import static java.lang.String.format; +import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount; + +import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions; + +public class SizeDeltaGcEstimation implements GCEstimation { + + private final long delta; + + private final long totalSize; + + private final GCJournal gcJournal; + + private boolean gcNeeded; + + private String gcInfo = "unknown"; + + private boolean finished = false; + + public SizeDeltaGcEstimation(SegmentGCOptions opts, GCJournal gcJournal, + long totalSize) { + this.delta = opts.getGcSizeDeltaEstimation(); + this.gcJournal = gcJournal; + this.totalSize = totalSize; + } + + @Override + public boolean gcNeeded() { + if (!finished) { + run(); + } + return gcNeeded; + } + + @Override + public String gcLog() { + if (!finished) { + run(); + } + return gcInfo; + } + + private void run() { + if (finished) { + return; + } + if (delta == 0) { + gcNeeded = true; + gcInfo = format( + "Estimation skipped because the size delta value equals 0", + delta); + } else if (getPreviousCleanupSize() < 0) { + gcNeeded = true; + gcInfo = format("Estimation skipped because of missing gc journal data"); + } else { + long lastGc = getPreviousCleanupSize(); + long gain = totalSize - lastGc; + long gainP = 100 * (totalSize - lastGc) / totalSize; + + gcNeeded = gain > delta; + if (gcNeeded) { + gcInfo = format( + "Size Delta is %s%% or %s/%s (%s/%s bytes), so running compaction", + gainP, humanReadableByteCount(lastGc), + humanReadableByteCount(totalSize), lastGc, totalSize); + } else { + gcInfo = format( + "Size Delta is %s%% or %s/%s (%s/%s bytes), so skipping compaction for now", + gainP, humanReadableByteCount(lastGc), + humanReadableByteCount(totalSize), lastGc, totalSize); + } + } + finished = true; + } + + private long getPreviousCleanupSize() { + if (gcJournal != null) { + return gcJournal.read().getSize(); + } else { + return -1; + } + } +} Index: oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java =================================================================== --- oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java (revision 1755586) +++ oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/CompactionEstimatorTest.java (working copy) @@ -80,9 +80,14 @@ fileStore.flush(); try { - // should be at 66% - assertTrue(fileStore.estimateCompactionGain(Suppliers.ofInstance(false)) - .estimateCompactionGain() > 60); + GCEstimation est = fileStore.estimateCompactionGain(Suppliers + .ofInstance(false)); + assertTrue(est.gcNeeded()); + if (est instanceof CompactionGainEstimate) { + // should be at 66% + assertTrue(((CompactionGainEstimate) est) + .estimateCompactionGain() > 60); + } } finally { fileStore.close(); } Index: oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/GcJournalTest.java =================================================================== --- oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/GcJournalTest.java (revision 0) +++ oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/GcJournalTest.java (revision 0) @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.segment.file; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; + +import java.io.File; +import java.util.Collection; + +import org.apache.jackrabbit.oak.segment.file.GCJournal.GCJournalEntry; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class GcJournalTest { + + @Rule + public final TemporaryFolder segmentFolder = new TemporaryFolder(new File( + "target")); + + @Test + public void tarGcJournal() throws Exception { + File directory = segmentFolder.newFolder(); + + GCJournal gc = new GCJournal(directory, mock(FileStore.class)); + + gc.persist("one", 100); + GCJournalEntry e0 = gc.read(); + assertEquals("one", e0.getId()); + assertEquals(100, e0.getSize()); + + gc.persist("two", 250); + GCJournalEntry e1 = gc.read(); + assertEquals("two", e1.getId()); + assertEquals(250, e1.getSize()); + + Collection all = gc.readAll(); + assertEquals(all.size(), 2); + } +}