diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java index 97186410ee..8452d5f20e 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java @@ -27,6 +27,7 @@ import org.apache.jackrabbit.oak.segment.spi.monitor.FileStoreMonitor; import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveReader; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveWriter; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,6 +42,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.UUID; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -225,6 +227,29 @@ public class AzureArchiveManager implements SegmentArchiveManager { } } + private void delete(String archiveName, Set recoveredEntries) throws IOException { + getBlobs(archiveName) + .forEach(cloudBlob -> { + if (!recoveredEntries.contains(AzureUtilities.getSegmentUUID(getName(cloudBlob)))) { + try { + cloudBlob.delete(); + } catch (StorageException e) { + log.error("Can't delete segment {}", cloudBlob.getUri().getPath(), e); + } + } + }); + } + + /** + * Method is not deleting segments from the directory given with {@code archiveName}, if they are in the set of recovered segments. + * Reason for that is because during execution of this method, remote repository can be accessed by another application, and deleting a valid segment can + * cause consistency issues there. + */ + @Override + public void backup(@NotNull String archiveName, @NotNull String backupArchiveName, @NotNull Set recoveredEntries) throws IOException { + copyFile(archiveName, backupArchiveName); + delete(archiveName, recoveredEntries); + } protected CloudBlobDirectory getDirectory(String archiveName) throws IOException { try { diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureUtilities.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureUtilities.java index 46a3e9fa42..47363c02b4 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureUtilities.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureUtilities.java @@ -26,6 +26,8 @@ import java.util.ArrayList; import java.util.EnumSet; import java.util.List; import java.util.UUID; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import com.microsoft.azure.storage.CloudStorageAccount; import com.microsoft.azure.storage.ResultContinuation; @@ -49,6 +51,8 @@ public final class AzureUtilities { public static String SEGMENT_FILE_NAME_PATTERN = "^([0-9a-f]{4})\\.([0-9a-f-]+)$"; + private static Pattern pattern = Pattern.compile(SEGMENT_FILE_NAME_PATTERN); + private static final Logger log = LoggerFactory.getLogger(AzureUtilities.class); private AzureUtilities() { @@ -62,6 +66,14 @@ public final class AzureUtilities { return String.format("%04x.%s", offset, new UUID(msb, lsb).toString()); } + public static UUID getSegmentUUID(@NotNull String segmentFileName) { + Matcher m = pattern.matcher(segmentFileName); + if (!m.matches()) { + return null; + } + return UUID.fromString(m.group(2)); + } + public static String getName(CloudBlob blob) { return Paths.get(blob.getName()).getFileName().toString(); } diff --git a/oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManagerTest.java b/oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManagerTest.java index 332138cde5..2e4f610ccd 100644 --- a/oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManagerTest.java +++ b/oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManagerTest.java @@ -26,10 +26,10 @@ import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders; import org.apache.jackrabbit.oak.segment.file.FileStore; import org.apache.jackrabbit.oak.segment.file.FileStoreBuilder; import org.apache.jackrabbit.oak.segment.file.InvalidFileStoreVersionException; -import org.apache.jackrabbit.oak.segment.spi.monitor.RemoteStoreMonitorAdapter; -import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveManager; import org.apache.jackrabbit.oak.segment.spi.monitor.FileStoreMonitorAdapter; import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitorAdapter; +import org.apache.jackrabbit.oak.segment.spi.monitor.RemoteStoreMonitorAdapter; +import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveManager; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveWriter; import org.apache.jackrabbit.oak.spi.commit.CommitInfo; import org.apache.jackrabbit.oak.spi.commit.EmptyHook; @@ -50,6 +50,9 @@ import java.util.UUID; import static com.google.common.collect.Lists.newArrayList; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; public class AzureArchiveManagerTest { @@ -85,6 +88,37 @@ public class AzureArchiveManagerTest { assertEquals(uuids.subList(0, 5), newArrayList(recovered.keySet())); } + @Test + public void testBackupWithRecoveredEntries() throws StorageException, URISyntaxException, IOException { + SegmentArchiveManager manager = new AzurePersistence(container.getDirectoryReference("oak")).createArchiveManager(false, false, new IOMonitorAdapter(), new FileStoreMonitorAdapter(), new RemoteStoreMonitorAdapter()); + SegmentArchiveWriter writer = manager.create("data00000a.tar"); + + List uuids = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + UUID u = UUID.randomUUID(); + writer.writeSegment(u.getMostSignificantBits(), u.getLeastSignificantBits(), new byte[10], 0, 10, 0, 0, false); + uuids.add(u); + } + + writer.flush(); + writer.close(); + + container.getBlockBlobReference("oak/data00000a.tar/0005." + uuids.get(5).toString()).delete(); + + LinkedHashMap recovered = new LinkedHashMap<>(); + manager.recoverEntries("data00000a.tar", recovered); + + manager.backup("data00000a.tar", "data00000a.tar.bak", recovered.keySet()); + + for (int i = 0; i <= 4; i++) { + assertTrue(container.getBlockBlobReference("oak/data00000a.tar/000"+ i + "." + uuids.get(i)).exists()); + } + + for (int i = 5; i <= 9; i++) { + assertFalse(String.format("Segment %s.??? should have been deleted.", "oak/data00000a.tar/000"+ i), container.getBlockBlobReference("oak/data00000a.tar/000"+ i + "." + uuids.get(i)).exists()); + } + } + @Test public void testUncleanStop() throws URISyntaxException, IOException, InvalidFileStoreVersionException, CommitFailedException, StorageException { AzurePersistence p = new AzurePersistence(container.getDirectoryReference("oak")); @@ -135,6 +169,64 @@ public class AzureArchiveManagerTest { fs.close(); } + @Test + public void testUncleanStopSegmentMissing() throws URISyntaxException, IOException, InvalidFileStoreVersionException, CommitFailedException, StorageException { + AzurePersistence p = new AzurePersistence(container.getDirectoryReference("oak")); + FileStore fs = FileStoreBuilder.fileStoreBuilder(new File("target")).withCustomPersistence(p).build(); + SegmentNodeStore segmentNodeStore = SegmentNodeStoreBuilders.builder(fs).build(); + NodeBuilder builder = segmentNodeStore.getRoot().builder(); + builder.setProperty("foo", "bar"); + segmentNodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + fs.close(); + + // make sure there are 2 archives + fs = FileStoreBuilder.fileStoreBuilder(new File("target")).withCustomPersistence(p).build(); + segmentNodeStore = SegmentNodeStoreBuilders.builder(fs).build(); + builder = segmentNodeStore.getRoot().builder(); + builder.setProperty("foo0", "bar0"); + segmentNodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + fs.flush(); + //create segment 0001 + builder.setProperty("foo1", "bar1"); + segmentNodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + fs.flush(); + //create segment 0002 + builder.setProperty("foo2", "bar2"); + segmentNodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + fs.flush(); + //create segment 0003 + builder.setProperty("foo3", "bar3"); + segmentNodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + fs.flush(); + fs.close(); + + // remove the segment 0002 from the second archive + ListBlobItem segment0002 = container.listBlobs("oak/data00001a.tar/0002.").iterator().next(); + ((CloudBlob) segment0002).delete(); + container.getBlockBlobReference("oak/data00001a.tar/closed").delete(); + + fs = FileStoreBuilder.fileStoreBuilder(new File("target")).withCustomPersistence(p).build(); + segmentNodeStore = SegmentNodeStoreBuilders.builder(fs).build(); + assertEquals("bar", segmentNodeStore.getRoot().getString("foo")); + + //recovered archive data00001a.tar should not contain segments 0002 and 0003 + assertFalse(container.listBlobs("oak/data00001a.tar/0002.").iterator().hasNext()); + assertFalse(container.listBlobs("oak/data00001a.tar/0003.").iterator().hasNext()); + + assertTrue("Backup directory should have been created", container.listBlobs("oak/data00001a.tar.bak").iterator().hasNext()); + //backup has all segments but 0002 since it was deleted before recovery + assertTrue(container.listBlobs("oak/data00001a.tar.bak/0001.").iterator().hasNext()); + assertFalse(container.listBlobs("oak/data00001a.tar.bak/0002.").iterator().hasNext()); + assertTrue(container.listBlobs("oak/data00001a.tar.bak/0003.").iterator().hasNext()); + + //verify content from recovered segments preserved + assertEquals("bar1", segmentNodeStore.getRoot().getString("foo1")); + //content from deleted segments not preserved + assertNull(segmentNodeStore.getRoot().getString("foo2")); + assertNull(segmentNodeStore.getRoot().getString("foo3")); + fs.close(); + } + @Test public void testExists() throws IOException, URISyntaxException { SegmentArchiveManager manager = new AzurePersistence(container.getDirectoryReference("oak")).createArchiveManager(false, false, new IOMonitorAdapter(), new FileStoreMonitorAdapter(), new RemoteStoreMonitorAdapter()); diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/SegmentTarManager.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/SegmentTarManager.java index 39c288ad12..c299dcf03a 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/SegmentTarManager.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/SegmentTarManager.java @@ -29,6 +29,7 @@ import java.nio.file.Files; import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; +import java.util.Set; import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -42,6 +43,7 @@ import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitor; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveManager; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveReader; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveWriter; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -177,6 +179,19 @@ public class SegmentTarManager implements SegmentArchiveManager { } } + @Override + public void backup(@NotNull String archiveName, @NotNull String backupArchiveName, @NotNull Set recoveredEntries) throws IOException { + + if (!renameTo(archiveName, backupArchiveName)) { + log.warn("Renaming failed, so using copy to backup {}", archiveName); + copyFile(archiveName, backupArchiveName); + if (!delete(archiveName)) { + throw new IOException( + "Could not remove broken tar file " + archiveName); + } + } + } + /** * Scans through the tar file, looking for all segment entries. * diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java index 4162a83af6..8b7722cb94 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java @@ -152,7 +152,7 @@ public class TarReader implements Closeable { } if (backup) { - backupSafely(archiveManager, file); + backupSafely(archiveManager, file, entries.keySet()); } } @@ -202,18 +202,13 @@ public class TarReader implements Closeable { * overwritten. * * @param file File to backup. + * @param recoveredEntries */ - private static void backupSafely(SegmentArchiveManager archiveManager, String file) throws IOException { + private static void backupSafely(SegmentArchiveManager archiveManager, String file, Set recoveredEntries) throws IOException { String backup = findAvailGen(file, ".bak", archiveManager); log.info("Backing up {} to {}", file, backup); - if (!archiveManager.renameTo(file, backup)) { - log.warn("Renaming failed, so using copy to backup {}", file); - archiveManager.copyFile(file, backup); - if (!archiveManager.delete(file)) { - throw new IOException( - "Could not remove broken tar file " + file); - } - } + + archiveManager.backup(file, backup, recoveredEntries); } /** diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveManager.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveManager.java index de09e6af07..83cd384c0f 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveManager.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/SegmentArchiveManager.java @@ -21,6 +21,7 @@ package org.apache.jackrabbit.oak.segment.spi.persistence; import java.io.IOException; import java.util.LinkedHashMap; import java.util.List; +import java.util.Set; import java.util.UUID; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -114,4 +115,16 @@ public interface SegmentArchiveManager { */ void recoverEntries(@NotNull String archiveName, @NotNull LinkedHashMap entries) throws IOException; + /** + * Method that is doing a backup of the archive given with {@code archiveName} into {@code backupArchiveName}. + * In addition, set of UUIDs of recovered segments is provided which can be inspected during backup. + * Method is invoked during archive recovery procedure and concrete implementation can decide whether original archive + * should be deleted or modified. + * + * @param archiveName + * @param backupArchiveName + * @param recoveredEntries + * @throws IOException + */ + void backup(@NotNull String archiveName, @NotNull String backupArchiveName, @NotNull Set recoveredEntries) throws IOException; } diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/SplitSegmentArchiveManager.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/SplitSegmentArchiveManager.java index 5ba74273f5..3a025710e2 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/SplitSegmentArchiveManager.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/SplitSegmentArchiveManager.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; +import java.util.Set; import java.util.UUID; import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveManager; @@ -136,4 +137,13 @@ public class SplitSegmentArchiveManager implements SegmentArchiveManager { } } + @Override + public void backup(@NotNull String archiveName, @NotNull String backupArchiveName, @NotNull Set recoveredEntries) throws IOException { + if (roArchiveList.contains(archiveName)) { + // archive is in read only part + return; + } else { + rwArchiveManager.backup(archiveName, backupArchiveName, recoveredEntries); + } + } } diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/package-info.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/package-info.java new file mode 100644 index 0000000000..2e70f911d6 --- /dev/null +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/spi/persistence/split/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +@Version("1.0.0") +package org.apache.jackrabbit.oak.segment.spi.persistence.split; + +import org.osgi.annotation.versioning.Version; \ No newline at end of file