Index: oak-run/src/main/java/org/apache/jackrabbit/oak/run/SegmentUtils.java =================================================================== --- oak-run/src/main/java/org/apache/jackrabbit/oak/run/SegmentUtils.java (revision 1746139) +++ oak-run/src/main/java/org/apache/jackrabbit/oak/run/SegmentUtils.java (working copy) @@ -169,9 +169,27 @@ } static void compact(File directory, boolean force) throws IOException { + + boolean persistCM = Boolean.getBoolean("tar.PersistCompactionMap"); + CompactionStrategy compactionStrategy = new CompactionStrategy( + false, CompactionStrategy.CLONE_BINARIES_DEFAULT, + CompactionStrategy.CleanupType.CLEAN_ALL, 0, + CompactionStrategy.MEMORY_THRESHOLD_DEFAULT) { + + @Override + public boolean compacted(Callable setHead) + throws Exception { + // oak-run is doing compaction single-threaded + // hence no guarding needed - go straight ahead + // and call setHead + return setHead.call(); + } + }; + compactionStrategy.setOfflineCompaction(true); + compactionStrategy.setPersistCompactionMap(persistCM); + FileStore store = openFileStore(directory.getAbsolutePath(), force); try { - boolean persistCM = Boolean.getBoolean("tar.PersistCompactionMap"); System.out.println("Compacting " + directory); System.out.println(" before " + Arrays.toString(directory.list())); long sizeBefore = FileUtils.sizeOfDirectory(directory); @@ -181,22 +199,6 @@ System.out.println(" -> compacting"); - CompactionStrategy compactionStrategy = new CompactionStrategy( - false, CompactionStrategy.CLONE_BINARIES_DEFAULT, - CompactionStrategy.CleanupType.CLEAN_ALL, 0, - CompactionStrategy.MEMORY_THRESHOLD_DEFAULT) { - - @Override - public boolean compacted(Callable setHead) - throws Exception { - // oak-run is doing compaction single-threaded - // hence no guarding needed - go straight ahead - // and call setHead - return setHead.call(); - } - }; - compactionStrategy.setOfflineCompaction(true); - compactionStrategy.setPersistCompactionMap(persistCM); store.setCompactionStrategy(compactionStrategy); store.compact(); } finally { @@ -205,6 +207,7 @@ System.out.println(" -> cleaning up"); store = openFileStore(directory.getAbsolutePath(), false); + store.setCompactionStrategy(compactionStrategy); try { for (File file : store.cleanup()) { if (!file.exists() || file.delete()) { Index: oak-segment/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java =================================================================== --- oak-segment/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java (revision 1746139) +++ oak-segment/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java (working copy) @@ -866,7 +866,8 @@ LinkedList toRemove = newLinkedList(); Set cleanedIds = newHashSet(); for (TarReader reader : cleaned.keySet()) { - cleaned.put(reader, reader.cleanup(referencedIds, cleanedIds)); + cleaned.put(reader, reader.cleanup(referencedIds, cleanedIds, + compactionStrategy)); if (shutdown) { gcMonitor.info("TarMK GC #{}: cleanup interrupted", gcCount); break; Index: oak-segment/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java =================================================================== --- oak-segment/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java (revision 1746139) +++ oak-segment/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java (working copy) @@ -52,6 +52,7 @@ import org.apache.commons.io.FileUtils; import org.apache.jackrabbit.oak.plugins.segment.SegmentGraph.SegmentGraphVisitor; +import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -737,8 +738,10 @@ /** * Garbage collects segments in this file. First it collects the set of - * segments that are referenced / reachable, then (if more than 25% is - * garbage) creates a new generation of the file. + * segments that are referenced / reachable, then it creates a new + * generation of the file, if more than 25% is garbage. In the case of + * offline compaction, a new tar file will be created no matter how much + * garbage is detected. *

* The old generation files are not removed (they can't easily be removed, * for memory mapped files). @@ -749,7 +752,8 @@ * @return this (if the file is kept as is), or the new generation file, or * null if the file is fully garbage */ - synchronized TarReader cleanup(Set referencedIds, Set removed) throws IOException { + synchronized TarReader cleanup(Set referencedIds, Set removed, + CompactionStrategy strategy) throws IOException { String name = file.getName(); log.debug("Cleaning up {}", name); @@ -786,11 +790,15 @@ removed.addAll(cleaned); logCleanedSegments(cleaned); return null; - } else if (size >= access.length() * 3 / 4 && graph != null) { + } + double threshold = strategy.isOfflineCompaction() ? 99 / 100 : 3 / 4; + if (size >= access.length() * threshold && graph != null) { // the space savings are not worth it at less than 25%, // unless this tar file lacks a pre-compiled segment graph // in which case we'll always generate a new tar file with // the graph to speed up future garbage collection runs. + // offline compaction will ignore the savings threshold and create a + // new tar file on each run. log.debug("Not enough space savings. ({}/{}). Skipping clean up of {}", access.length() - size, access.length(), name); return this;