diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java index f8792f5f58..599ef9ed96 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java @@ -728,6 +728,101 @@ private CompactionResult compact( @Nonnull NodeState base, @Nonnull GCGeneration newGeneration) { try { + { + // This is a slightly modified inline of `cleanup()`, invoked with a + // successful compaction result. There are some important differences deriving + // from the fact that we are assuming that the compaction for `newGeneration` + // is going to succeed. + + // First, we don't have a RecordId for the compacted state, because it didn't + // happen yet. This shouldn't matter, because we are not going to advance the + // `gcJournal` to the `RecordId` of the compacted state. + + // Second, we are using a custom reclaimer that is similar to the one returned + // by `newOldReclaimer`, but that also takes in consideration that + // `newGeneration` has not been committed yet, and the most recent transient + // state shouldn't be removed. + + // Third, we don't clear the segment cache. There might be transient segments + // in there, and we don't want those segments to be removed. + + // Fourth, the following code assumes the number of retained generations fixed + // to two, which is also the default value for the Segment Store. A complete + // solution should be flexible enough to accommodate other values for the + // number of retained generations. + + PrintableStopwatch watch = PrintableStopwatch.createStarted(); + + Predicate reclaimer; + + GCGeneration currentGeneration = getGcGeneration(); + + switch (gcType) { + case FULL: + reclaimer = generation -> { + if (generation == null) { + return false; + } + if (generation.getFullGeneration() < currentGeneration.getFullGeneration()) { + return true; + } + if (generation.getFullGeneration() > currentGeneration.getFullGeneration()) { + return true; + } + assert generation.getFullGeneration() == currentGeneration.getFullGeneration(); + return generation.getGeneration() < currentGeneration.getGeneration() && !generation.isCompacted(); + }; + break; + case TAIL: + reclaimer = generation -> { + if (generation == null) { + return false; + } + if (generation.getFullGeneration() < currentGeneration.getFullGeneration() - 1) { + return true; + } + if (generation.getFullGeneration() == currentGeneration.getFullGeneration() - 1) { + return !generation.isCompacted(); + } + if (generation.getFullGeneration() > currentGeneration.getFullGeneration()) { + return true; + } + assert generation.getFullGeneration() == currentGeneration.getFullGeneration(); + return generation.getGeneration() < currentGeneration.getGeneration() && !generation.isCompacted(); + }; + break; + default: + throw new IllegalArgumentException("invalid garbage collection type"); + + } + + gcListener.info("pre-compaction cleanup started"); + gcListener.updateStatus(CLEANUP.message()); + + // Suggest to the JVM that now would be a good time + // to clear stale weak references in the SegmentTracker + + System.gc(); + + TarFiles.CleanupResult cleanupResult = tarFiles.cleanup(newCleanupContext(reclaimer)); + if (cleanupResult.isInterrupted()) { + gcListener.info("cleanup interrupted"); + } + tracker.clearSegmentIdTables(cleanupResult.getReclaimedSegmentIds(), "[pre-compaction cleanup]"); + gcListener.info("cleanup marking files for deletion: {}", toFileNames(cleanupResult.getRemovableFiles())); + + long finalSize = size(); + long reclaimedSize = cleanupResult.getReclaimedSize(); + stats.reclaimed(reclaimedSize); + gcListener.cleaned(reclaimedSize, finalSize); + gcListener.info( + "cleanup completed in {}. Post cleanup size is {} and space reclaimed {}.", + watch, + newPrintableBytes(finalSize), + newPrintableBytes(reclaimedSize)); + fileReaper.add(cleanupResult.getRemovableFiles()); + } + PrintableStopwatch watch = PrintableStopwatch.createStarted(); gcListener.info("compaction started, gc options={}, current generation={}, new generation={}", gcOptions, getHead().getRecordId().getSegment().getGcGeneration(), newGeneration); diff --git a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/CompactionAndCleanupIT.java b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/CompactionAndCleanupIT.java index 93113f800a..ac031a29b0 100644 --- a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/CompactionAndCleanupIT.java +++ b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/CompactionAndCleanupIT.java @@ -774,68 +774,110 @@ private static void traverse(NodeState node) { * Test asserting OAK-3348: Cross gc sessions might introduce references to pre-compacted segments */ @Test - public void preCompactionReferences() throws Exception { - for (String ref : new String[] {"merge-before-compact", "merge-after-compact"}) { - File repoDir = new File(getFileStoreFolder(), ref); - FileStore fileStore = fileStoreBuilder(repoDir) - .withMaxFileSize(2) - .withGCOptions(defaultGCOptions()) - .build(); - final SegmentNodeStore nodeStore = SegmentNodeStoreBuilders.builder(fileStore).build(); - try { - // add some content - NodeBuilder preGCBuilder = nodeStore.getRoot().builder(); - preGCBuilder.setChildNode("test").setProperty("blob", createBlob(nodeStore, 1024 * 1024)); - nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); - - // remove it again so we have something to gc - preGCBuilder = nodeStore.getRoot().builder(); - preGCBuilder.getChildNode("test").remove(); - nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); - - // with a new builder simulate exceeding the update limit. - // This will cause changes to be pre-written to segments - preGCBuilder = nodeStore.getRoot().builder(); - preGCBuilder.setChildNode("test").setChildNode("a").setChildNode("b").setProperty("foo", "bar"); - for (int k = 0; k < getInteger("update.limit", 10000); k += 2) { - preGCBuilder.setChildNode("dummy").remove(); - } + public void preCompactionReferencesWhenMergingBeforeCompact() throws Exception { + File repoDir = new File(getFileStoreFolder(), "merge-before-compact"); + FileStore fileStore = fileStoreBuilder(repoDir) + .withMaxFileSize(2) + .withGCOptions(defaultGCOptions()) + .build(); + final SegmentNodeStore nodeStore = SegmentNodeStoreBuilders.builder(fileStore).build(); + try { + // add some content + NodeBuilder preGCBuilder = nodeStore.getRoot().builder(); + preGCBuilder.setChildNode("test").setProperty("blob", createBlob(nodeStore, 1024 * 1024)); + nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // remove it again so we have something to gc + preGCBuilder = nodeStore.getRoot().builder(); + preGCBuilder.getChildNode("test").remove(); + nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // with a new builder simulate exceeding the update limit. + // This will cause changes to be pre-written to segments + preGCBuilder = nodeStore.getRoot().builder(); + preGCBuilder.setChildNode("test").setChildNode("a").setChildNode("b").setProperty("foo", "bar"); + for (int k = 0; k < getInteger("update.limit", 10000); k += 2) { + preGCBuilder.setChildNode("dummy").remove(); + } - // case 1: merge above changes before compact - if ("merge-before-compact".equals(ref)) { - NodeBuilder builder = nodeStore.getRoot().builder(); - builder.setChildNode("n"); - nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); - nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); - } + // merge above changes before compact + NodeBuilder builder = nodeStore.getRoot().builder(); + builder.setChildNode("n"); + nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); - // Ensure cleanup is efficient by surpassing the number of - // retained generations - for (int k = 0; k < defaultGCOptions().getRetainedGenerations(); k++) { - fileStore.compactFull(); - } + // Ensure cleanup is efficient by surpassing the number of + // retained generations + for (int k = 0; k < defaultGCOptions().getRetainedGenerations(); k++) { + fileStore.compactFull(); + } + } finally { + fileStore.close(); + } - // case 2: merge above changes after compact - if ("merge-after-compact".equals(ref)) { - NodeBuilder builder = nodeStore.getRoot().builder(); - builder.setChildNode("n"); - nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); - nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); - } - } finally { - fileStore.close(); + // Re-initialise the file store to simulate off-line gc + fileStore = fileStoreBuilder(repoDir).withMaxFileSize(2).build(); + try { + // The 1M blob should get gc-ed + fileStore.cleanup(); + assertTrue("merge-before-compact" + " repository size " + fileStore.getStats().getApproximateSize() + " < " + 1024 * 1024, + fileStore.getStats().getApproximateSize() < 1024 * 1024); + } finally { + fileStore.close(); + } + } + + @Test + public void preCompactionReferencesWhenMeringAfterCompact() throws Exception { + File repoDir = new File(getFileStoreFolder(), "merge-after-compact"); + FileStore fileStore = fileStoreBuilder(repoDir) + .withMaxFileSize(2) + .withGCOptions(defaultGCOptions()) + .build(); + final SegmentNodeStore nodeStore = SegmentNodeStoreBuilders.builder(fileStore).build(); + try { + // add some content + NodeBuilder preGCBuilder = nodeStore.getRoot().builder(); + preGCBuilder.setChildNode("test").setProperty("blob", createBlob(nodeStore, 1024 * 1024)); + nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // remove it again so we have something to gc + preGCBuilder = nodeStore.getRoot().builder(); + preGCBuilder.getChildNode("test").remove(); + nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // with a new builder simulate exceeding the update limit. + // This will cause changes to be pre-written to segments + preGCBuilder = nodeStore.getRoot().builder(); + preGCBuilder.setChildNode("test").setChildNode("a").setChildNode("b").setProperty("foo", "bar"); + for (int k = 0; k < getInteger("update.limit", 10000); k += 2) { + preGCBuilder.setChildNode("dummy").remove(); } - // Re-initialise the file store to simulate off-line gc - fileStore = fileStoreBuilder(repoDir).withMaxFileSize(2).build(); - try { - // The 1M blob should get gc-ed - fileStore.cleanup(); - assertTrue(ref + " repository size " + fileStore.getStats().getApproximateSize() + " < " + 1024 * 1024, - fileStore.getStats().getApproximateSize() < 1024 * 1024); - } finally { - fileStore.close(); + // Ensure cleanup is efficient by surpassing the number of + // retained generations + for (int k = 0; k < defaultGCOptions().getRetainedGenerations(); k++) { + fileStore.compactFull(); } + + // merge above changes after compact + NodeBuilder builder = nodeStore.getRoot().builder(); + builder.setChildNode("n"); + nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + } finally { + fileStore.close(); + } + + // Re-initialise the file store to simulate off-line gc + fileStore = fileStoreBuilder(repoDir).withMaxFileSize(2).build(); + try { + // The 1M blob should get gc-ed + fileStore.cleanup(); + assertTrue("merge-after-compact" + " repository size " + fileStore.getStats().getApproximateSize() + " < " + 1024 * 1024, + fileStore.getStats().getApproximateSize() < 1024 * 1024); + } finally { + fileStore.close(); } }