Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/ReadOnlyFileStore.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/ReadOnlyFileStore.java (revision 7474c8d9c0f9e480a8796b6586f591e3a4430688) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/ReadOnlyFileStore.java (date 1535711557000) @@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.segment.file; import static org.apache.jackrabbit.oak.segment.DefaultSegmentWriterBuilder.defaultSegmentWriterBuilder; +import static org.apache.jackrabbit.oak.segment.file.Reclaimers.newOldReclaimer; import java.io.IOException; import java.util.ArrayList; @@ -28,6 +29,7 @@ import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; +import java.util.function.Consumer; import com.google.common.io.Closer; import com.google.common.util.concurrent.UncheckedExecutionException; @@ -35,6 +37,7 @@ import org.apache.jackrabbit.oak.segment.Segment; import org.apache.jackrabbit.oak.segment.SegmentId; import org.apache.jackrabbit.oak.segment.SegmentWriter; +import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions; import org.apache.jackrabbit.oak.segment.file.tar.TarFiles; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; @@ -55,6 +58,7 @@ @NotNull private final SegmentWriter writer; + private final int gcRetainedGenerations; private ReadOnlyRevisions revisions; @@ -75,6 +79,8 @@ .build(); writer = defaultSegmentWriterBuilder("read-only").withoutCache().build(this); + gcRetainedGenerations = builder.getGcOptions().getRetainedGenerations(); + log.info("TarMK ReadOnly opened: {} (mmap={})", directory, memoryMapping); } @@ -165,4 +171,10 @@ public Set getReferencedSegmentIds() { return tracker.getReferencedSegmentIds(); } + + @Override + public void collectBlobReferences(Consumer collector) throws IOException { + tarFiles.collectBlobReferences(collector, + newOldReclaimer(SegmentGCOptions.GCType.FULL, revisions.getHead().getSegmentId().getGcGeneration(), gcRetainedGenerations)); + } } Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentBlobReferenceRetriever.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentBlobReferenceRetriever.java (revision 7474c8d9c0f9e480a8796b6586f591e3a4430688) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentBlobReferenceRetriever.java (date 1535954899000) @@ -23,7 +23,7 @@ import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever; import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector; -import org.apache.jackrabbit.oak.segment.file.FileStore; +import org.apache.jackrabbit.oak.segment.file.AbstractFileStore; /** * Implementation of {@link BlobReferenceRetriever} to retrieve blob references from the @@ -31,9 +31,9 @@ */ public class SegmentBlobReferenceRetriever implements BlobReferenceRetriever { - private final FileStore store; + private final AbstractFileStore store; - public SegmentBlobReferenceRetriever(FileStore store) { + public SegmentBlobReferenceRetriever(AbstractFileStore store) { this.store = store; } Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java (revision 7474c8d9c0f9e480a8796b6586f591e3a4430688) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java (date 1535711557000) @@ -28,6 +28,7 @@ import java.util.HashSet; import java.util.Set; import java.util.UUID; +import java.util.function.Consumer; import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean; import org.apache.jackrabbit.oak.segment.CachingSegmentReader; @@ -270,4 +271,17 @@ return new Segment(tracker, segmentReader, id, buffer); } + /** + * Finds all external blob references that are currently accessible + * in this repository and adds them to the given collector. Useful + * for collecting garbage in an external data store. + *

+ * Note that this method only collects blob references that are already + * stored in the repository (at the time when this method is called), so + * the garbage collector will need some other mechanism for tracking + * in-memory references and references stored while this method is + * running. + * @param collector reference collector called back for each blob reference found + */ + public abstract void collectBlobReferences(Consumer collector) throws IOException; } Index: oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/ReadOnlyStoreBlobReferencesTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/ReadOnlyStoreBlobReferencesTest.java (date 1535954735000) +++ oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/ReadOnlyStoreBlobReferencesTest.java (date 1535954735000) @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.segment; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.util.Random; +import java.util.Set; + +import com.google.common.base.Strings; +import org.apache.jackrabbit.oak.api.Blob; +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore; +import org.apache.jackrabbit.oak.plugins.blob.datastore.OakFileDataStore; +import org.apache.jackrabbit.oak.segment.file.FileStore; +import org.apache.jackrabbit.oak.segment.file.InvalidFileStoreVersionException; +import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore; +import org.apache.jackrabbit.oak.spi.blob.BlobStore; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import static com.google.common.collect.Sets.newHashSet; +import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.defaultGCOptions; +import static org.apache.jackrabbit.oak.segment.file.FileStoreBuilder.fileStoreBuilder; +import static org.junit.Assert.assertEquals; + +/** + * Tests for ReadOnlyFileStore#collectReferences + */ +public class ReadOnlyStoreBlobReferencesTest { + + @Rule + public TemporaryFolder folder = new TemporaryFolder(new File("target")); + + + @Test + public void collectReferences() + throws IOException, InvalidFileStoreVersionException, CommitFailedException { + File fileStoreDir = new File(getFileStoreFolder(), "segmentstore"); + File dataStoreDir = new File(getFileStoreFolder(), "blobstore"); + String blobId = createLoad(fileStoreDir, dataStoreDir).getContentIdentity(); + + assertReferences(fileStoreDir, dataStoreDir, 1, blobId); + } + + @Test + public void collectReferencesAfterGC() + throws IOException, InvalidFileStoreVersionException, CommitFailedException { + File fileStoreDir = new File(getFileStoreFolder(), "segmentstore"); + File dataStoreDir = new File(getFileStoreFolder(), "blobstore"); + String blobId = createLoad(fileStoreDir, dataStoreDir).getContentIdentity(); + + + try (FileStore fileStore = fileStoreBuilder(fileStoreDir).withBlobStore(newBlobStore(dataStoreDir)) + .withGCOptions(defaultGCOptions().setGcSizeDeltaEstimation(1).setRetainedGenerations(1)).build()) { + + SegmentNodeStore nodeStore = SegmentNodeStoreBuilders.builder(fileStore).build(); + NodeBuilder builder = nodeStore.getRoot().builder(); + builder.removeProperty("bin"); + nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + fileStore.flush(); + + Set actualReferences = newHashSet(); + fileStore.collectBlobReferences(actualReferences::add); + assertEquals("Binary should be visible before gc cycle", 1, actualReferences.size()); + assertEquals("Binary reference returned should be same", blobId, + actualReferences.toArray(new String[0])[0]); + + actualReferences = newHashSet(); + fileStore.fullGC(); + fileStore.collectBlobReferences(actualReferences::add); + assertEquals("Binary should be deleted after gc cycle", 0, actualReferences.size()); + } + + assertReferences(fileStoreDir, dataStoreDir, 0, null); + } + + private File getFileStoreFolder() { + return folder.getRoot(); + } + + private static Blob createBlob(NodeStore nodeStore, int size) throws IOException { + byte[] data = new byte[size]; + new Random().nextBytes(data); + return nodeStore.createBlob(new ByteArrayInputStream(data)); + } + + private static BlobStore newBlobStore(File directory) { + OakFileDataStore delegate = new OakFileDataStore(); + delegate.setPath(directory.getAbsolutePath()); + delegate.init(null); + return new DataStoreBlobStore(delegate); + } + + private Blob createLoad(File fileStoreDir, File dataStoreDir) + throws IOException, CommitFailedException, InvalidFileStoreVersionException { + try (FileStore fileStore = fileStoreBuilder(fileStoreDir).withBlobStore(newBlobStore(dataStoreDir)) + .withGCOptions(defaultGCOptions().setGcSizeDeltaEstimation(0)).build()) { + SegmentNodeStore nodeStore = SegmentNodeStoreBuilders.builder(fileStore).build(); + + NodeBuilder builder = nodeStore.getRoot().builder(); + Blob blob = createBlob(nodeStore, 18000); + builder.setProperty("bin", blob); + nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + fileStore.flush(); + return blob; + } + } + + private void assertReferences(File fileStoreDir, File dataStoreDir, int count, String blobId) + throws IOException, InvalidFileStoreVersionException { + try (ReadOnlyFileStore fileStore = fileStoreBuilder(fileStoreDir).withBlobStore(newBlobStore(dataStoreDir)) + .buildReadOnly()) { + + Set actualReferences = newHashSet(); + fileStore.collectBlobReferences(actualReferences::add); + assertEquals("Read only store visible references different", count, actualReferences.size()); + if (!Strings.isNullOrEmpty(blobId)) { + assertEquals("Binary reference returned should be same", blobId, + actualReferences.toArray(new String[0])[0]); + } + } + } +}