Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Compactor.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Compactor.java (revision 1701966) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Compactor.java (working copy) @@ -28,6 +28,8 @@ import java.util.List; import java.util.Map; +import com.google.common.base.Predicate; +import com.google.common.base.Predicates; import com.google.common.hash.Hashing; import org.apache.jackrabbit.oak.api.Blob; import org.apache.jackrabbit.oak.api.PropertyState; @@ -65,6 +67,12 @@ private final PartialCompactionMap map; + /** + * Filters nodes that will be included in the compaction map, allowing for + * optimization in case of an offline compaction + */ + private Predicate includeInMap = Predicates.alwaysTrue(); + private final ProgressTracker progress = new ProgressTracker(); /** @@ -94,6 +102,9 @@ this.map = new InMemoryCompactionMap(writer.getTracker()); } this.cloneBinaries = compactionStrategy.cloneBinaries(); + if (compactionStrategy.isOfflineCompaction()) { + includeInMap = new OfflineCompactionPredicate(); + } } protected SegmentNodeBuilder process(NodeState before, NodeState after, NodeState onto) { @@ -209,7 +220,7 @@ if (success) { SegmentNodeState state = writer.writeNode(child.getNodeState()); builder.setChildNode(name, state); - if (id != null) { + if (id != null && includeInMap.apply(state)) { map.put(id, state.getRecordId()); } } @@ -388,4 +399,31 @@ } } + private static class OfflineCompactionPredicate implements + Predicate { + + /** + * over 64K in size, node will be included in the compaction map + */ + private static final long offlineThreshold = 65536; + + @Override + public boolean apply(NodeState state) { + if (state.getChildNodeCount(2) > 1) { + return true; + } + long count = 0; + for (PropertyState ps : state.getProperties()) { + for (int i = 0; i < ps.count(); i++) { + long size = ps.size(i); + count += size; + if (size >= offlineThreshold || count >= offlineThreshold) { + return true; + } + } + } + return false; + } + } + } Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/compaction/CompactionStrategy.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/compaction/CompactionStrategy.java (revision 1701966) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/compaction/CompactionStrategy.java (working copy) @@ -129,6 +129,12 @@ */ private byte gainThreshold = GAIN_THRESHOLD_DEFAULT; + /** + * Flag that allows turning on an optimized version of the compaction + * process in the case of offline compaction + */ + private boolean offlineCompaction = false; + protected CompactionStrategy(boolean paused, boolean cloneBinaries, @Nonnull CleanupType cleanupType, long olderThan, byte memoryThreshold) { checkArgument(olderThan >= 0); @@ -278,4 +284,12 @@ public abstract boolean compacted(@Nonnull Callable setHead) throws Exception; + public boolean isOfflineCompaction() { + return offlineCompaction; + } + + public void setOfflineCompaction(boolean offlineCompaction) { + this.offlineCompaction = offlineCompaction; + } + } Index: oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java =================================================================== --- oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java (revision 1701966) +++ oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java (working copy) @@ -53,6 +53,7 @@ import javax.jcr.Repository; import com.google.common.base.Joiner; +import com.google.common.base.Stopwatch; import com.google.common.collect.Maps; import com.google.common.collect.Queues; import com.google.common.io.Closer; @@ -65,6 +66,7 @@ import joptsimple.OptionSet; import joptsimple.OptionSpec; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang.time.DateUtils; import org.apache.jackrabbit.core.RepositoryContext; import org.apache.jackrabbit.core.config.RepositoryConfig; @@ -72,6 +74,7 @@ import org.apache.jackrabbit.oak.api.ContentRepository; import org.apache.jackrabbit.oak.benchmark.BenchmarkRunner; import org.apache.jackrabbit.oak.checkpoint.Checkpoints; +import org.apache.jackrabbit.oak.commons.IOUtils; import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.commons.json.JsopBuilder; import org.apache.jackrabbit.oak.console.Console; @@ -474,9 +477,16 @@ System.err.println("Invalid FileStore directory " + args[0]); System.exit(1); } else { + boolean persistCM = Boolean + .getBoolean("tar.PersistCompactionMap"); + Stopwatch watch = Stopwatch.createStarted(); File directory = new File(args[0]); System.out.println("Compacting " + directory); System.out.println(" before " + Arrays.toString(directory.list())); + long sizeBefore = FileUtils.sizeOfDirectory(directory); + System.out.println(" size " + + IOUtils.humanReadableByteCount(sizeBefore) + " (" + sizeBefore + + " bytes)"); System.out.println(" -> compacting"); FileStore store = new FileStore(directory, 256, TAR_STORAGE_MEMORY_MAPPED); @@ -494,6 +504,8 @@ return setHead.call(); } }; + compactionStrategy.setPersistCompactionMap(persistCM); + compactionStrategy.setOfflineCompaction(true); store.setCompactionStrategy(compactionStrategy); store.compact(); } finally { @@ -507,8 +519,15 @@ } finally { store.close(); } - - System.out.println(" after " + Arrays.toString(directory.list())); + watch.stop(); + System.out.println(" after " + + Arrays.toString(directory.list())); + long sizeAfter = FileUtils.sizeOfDirectory(directory); + System.out.println(" size " + + IOUtils.humanReadableByteCount(sizeAfter) + " (" + sizeAfter + + " bytes)"); + System.out.println(" duration " + watch.toString() + " (" + + watch.elapsed(TimeUnit.SECONDS) + "s)."); } }