From fb515517f758645bc2a270e041dbe7268bacb6ea Mon Sep 17 00:00:00 2001 From: Jukka Zitting Date: Thu, 21 Aug 2014 02:31:02 -0400 Subject: [PATCH] OAK-2019: Compact only if needed Use a rough gain estimate based on bulk segments to skip online compaction if doing so would likely release less than 10% of the repository size. The estimate only considers bulk segments for now and is intentionally a bit fuzzy (uses a Bloom filter for O(1) memory overhead), as more accurate estimates would be significantly harder to produce. --- .../oak/plugins/segment/SegmentBlob.java | 31 +++++++ .../segment/file/CompactionGainEstimate.java | 88 ++++++++++++++++++++ .../oak/plugins/segment/file/FileStore.java | 40 ++++++++- .../oak/plugins/segment/file/TarEntryVisitor.java | 25 ++++++ .../oak/plugins/segment/file/TarReader.java | 30 ++++++- .../oak/plugins/segment/file/TarWriter.java | 9 ++ 6 files changed, 221 insertions(+), 2 deletions(-) create mode 100644 oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/CompactionGainEstimate.java create mode 100644 oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntryVisitor.java diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java index c248505..c9a4429 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java @@ -17,6 +17,8 @@ package org.apache.jackrabbit.oak.plugins.segment; import static com.google.common.base.Charsets.UTF_8; +import static com.google.common.collect.Sets.newIdentityHashSet; +import static java.util.Collections.emptySet; import static org.apache.jackrabbit.oak.plugins.segment.Segment.MEDIUM_LIMIT; import static org.apache.jackrabbit.oak.plugins.segment.Segment.SMALL_LIMIT; import static org.apache.jackrabbit.oak.plugins.segment.SegmentWriter.BLOCK_SIZE; @@ -24,6 +26,7 @@ import static org.apache.jackrabbit.oak.plugins.segment.SegmentWriter.BLOCK_SIZE import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.Set; import javax.annotation.CheckForNull; import javax.annotation.Nonnull; @@ -37,6 +40,14 @@ import org.apache.jackrabbit.oak.spi.blob.BlobStore; */ public class SegmentBlob extends Record implements Blob { + public static Iterable getBulkSegmentIds(Blob blob) { + if (blob instanceof SegmentBlob) { + return ((SegmentBlob) blob).getBulkSegmentIds(); + } else { + return emptySet(); + } + } + SegmentBlob(RecordId id) { super(id); } @@ -207,4 +218,24 @@ public class SegmentBlob extends Record implements Blob { return new String(bytes, UTF_8); } + private Iterable getBulkSegmentIds() { + Segment segment = getSegment(); + int offset = getOffset(); + byte head = segment.readByte(offset); + if ((head & 0xe0) == 0xc0) { + // 110x xxxx: long value + long length = (segment.readLong(offset) & 0x1fffffffffffffffL) + MEDIUM_LIMIT; + int listSize = (int) ((length + BLOCK_SIZE - 1) / BLOCK_SIZE); + ListRecord list = new ListRecord( + segment.readRecordId(offset + 8), listSize); + Set ids = newIdentityHashSet(); + for (RecordId id : list.getEntries()) { + ids.add(id.getSegmentId()); + } + return ids; + } else { + return emptySet(); + } + } + } diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/CompactionGainEstimate.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/CompactionGainEstimate.java new file mode 100644 index 0000000..d4cf76e --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/CompactionGainEstimate.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.segment.file; + +import static org.apache.jackrabbit.oak.api.Type.BINARIES; + +import java.io.File; +import java.io.IOException; +import java.util.UUID; + +import org.apache.jackrabbit.oak.api.Blob; +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.plugins.segment.SegmentBlob; +import org.apache.jackrabbit.oak.plugins.segment.SegmentId; +import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; +import org.apache.jackrabbit.oak.spi.state.NodeState; + +import com.google.common.hash.BloomFilter; +import com.google.common.hash.Funnel; +import com.google.common.hash.PrimitiveSink; + +class CompactionGainEstimate implements TarEntryVisitor { + + private static final Funnel UUID_FUNNEL = new Funnel() { + @Override + public void funnel(UUID from, PrimitiveSink into) { + into.putLong(from.getMostSignificantBits()); + into.putLong(from.getLeastSignificantBits()); + } + }; + + private final BloomFilter uuids; + + private long totalSize = 0; + + private long reachableSize = 0; + + CompactionGainEstimate(NodeState node, int estimatedBulkCount) { + uuids = BloomFilter.create(UUID_FUNNEL, estimatedBulkCount); + collectBulkSegments(node); + } + + private void collectBulkSegments(NodeState node) { + for (PropertyState property : node.getProperties()) { + for (Blob blob : property.getValue(BINARIES)) { + for (SegmentId id : SegmentBlob.getBulkSegmentIds(blob)) { + uuids.put(new UUID( + id.getMostSignificantBits(), + id.getLeastSignificantBits())); + } + } + } + for (ChildNodeEntry child : node.getChildNodeEntries()) { + collectBulkSegments(child.getNodeState()); + } + } + + public long estimateCompactionGain() { + return 100 * (totalSize - reachableSize) / totalSize; + } + + //---------------------------------------------------< TarEntryVisitor >-- + + @Override + public void visit(long msb, long lsb, File file, int offset, int size) { + int entrySize = TarReader.getEntrySize(size); + totalSize += entrySize; + if (SegmentId.isDataSegmentId(lsb) + || uuids.mightContain(new UUID(msb, lsb))) { + reachableSize += entrySize; + } + } + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java index bbda2b8..305d87a 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java @@ -249,7 +249,13 @@ public class FileStore implements SegmentStore { new Runnable() { @Override public void run() { - compact(); + long gain = estimateCompactionGain(); + if (gain >= 10) { + log.info("Estimated compaction gain is {}%, so running compaction", gain); + compact(); + } else { + log.info("Estimated compaction gain is {}%, so skipping compaction for now", gain); + } } }); @@ -336,6 +342,38 @@ public class FileStore implements SegmentStore { return size; } + /** + * Returns the number of segments in this TarMK instance. + * + * @return number of segments + */ + private synchronized int count() { + int count = writer.count(); + for (TarReader reader : readers) { + count += reader.count(); + } + return count; + } + + /** + * Returns a percentage estimate (scale 0-100) for how much disk space + * running compaction (and cleanup) could potentially release. + * + * @return percentage of disk space that could be freed with compaction + */ + public long estimateCompactionGain() { + CompactionGainEstimate estimate = + new CompactionGainEstimate(getHead(), count()); + + synchronized (this) { + for (TarReader reader : readers) { + reader.accept(estimate); + } + } + + return estimate.estimateCompactionGain(); + } + public void flush() throws IOException { synchronized (persistedHead) { RecordId before = persistedHead.get(); diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntryVisitor.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntryVisitor.java new file mode 100644 index 0000000..b146e6d --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntryVisitor.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.segment.file; + +import java.io.File; + +interface TarEntryVisitor { + + void visit(long msb, long lsb, File file, int offset, int size); + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java index 26b52f5..f302ad7 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java @@ -67,7 +67,7 @@ class TarReader { /** The tar file block size. */ private static final int BLOCK_SIZE = TarWriter.BLOCK_SIZE; - private static final int getEntrySize(int size) { + static int getEntrySize(int size) { return BLOCK_SIZE + size + TarWriter.getPaddingSize(size); } @@ -485,6 +485,34 @@ class TarReader { return file.length(); } + /** + * Returns the number of segments in this tar file. + * + * @return number of segments + */ + int count() { + return index.capacity() / 24; + } + + /** + * Iterates over all entries in this tar file and calls + * {@link TarEntryVisitor#visit(long, long, File, int, int)} on them. + * + * @param visitor entry visitor + */ + void accept(TarEntryVisitor visitor) { + int position = index.position(); + while (position < index.limit()) { + visitor.visit( + index.getLong(position), + index.getLong(position + 8), + file, + index.getInt(position + 16), + index.getInt(position + 20)); + position += 24; + } + } + Set getUUIDs() { Set uuids = newHashSetWithExpectedSize(index.remaining() / 24); int position = index.position(); diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java index 36222cf..8b6cf35 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java @@ -143,6 +143,15 @@ class TarWriter { this.file = file; } + /** + * Returns the number of segments written so far to this tar file. + * + * @return number of segments written so far + */ + synchronized int count() { + return index.size(); + } + synchronized Set getUUIDs() { return newHashSet(index.keySet()); } -- 1.7.10.msysgit.1