Index: oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureBlobMetadata.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureBlobMetadata.java (revision 70a1c21d487d6458f9891a111eb07a51820d090e) +++ oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureBlobMetadata.java (revision 977e2b495bd9874b099c31a9d9b8315faf4bc09d) @@ -16,25 +16,36 @@ */ package org.apache.jackrabbit.oak.segment.azure; +import org.apache.jackrabbit.oak.segment.azure.util.CaseInsensitiveKeysMapAccess; + import java.util.HashMap; import java.util.Map; import java.util.UUID; +/** + * Provides access to the blob metadata. + *

+ * In azure blob metadata keys are case-insensitive. A bug in the tool azcopy v10 make each key to start with + * an uppercase letter. To avoid future bugs we should be tolerant in what we read. + *

+ * Azure Blobs metadata can not store multiple entries with the same key where only the case differs. Therefore it is + * safe to use the same concept in java, see {@link CaseInsensitiveKeysMapAccess} + */ public final class AzureBlobMetadata { - private static final String METADATA_TYPE = "type"; + static final String METADATA_TYPE = "type"; - private static final String METADATA_SEGMENT_UUID = "uuid"; + static final String METADATA_SEGMENT_UUID = "uuid"; - private static final String METADATA_SEGMENT_POSITION = "position"; + static final String METADATA_SEGMENT_POSITION = "position"; - private static final String METADATA_SEGMENT_GENERATION = "generation"; + static final String METADATA_SEGMENT_GENERATION = "generation"; - private static final String METADATA_SEGMENT_FULL_GENERATION = "fullGeneration"; + static final String METADATA_SEGMENT_FULL_GENERATION = "fullGeneration"; - private static final String METADATA_SEGMENT_COMPACTED = "compacted"; + static final String METADATA_SEGMENT_COMPACTED = "compacted"; - private static final String TYPE_SEGMENT = "segment"; + static final String TYPE_SEGMENT = "segment"; public static HashMap toSegmentMetadata(AzureSegmentArchiveEntry indexEntry) { HashMap map = new HashMap<>(); @@ -48,18 +59,23 @@ } public static AzureSegmentArchiveEntry toIndexEntry(Map metadata, int length) { - UUID uuid = UUID.fromString(metadata.get(METADATA_SEGMENT_UUID)); + Map caseInsensitiveMetadata = CaseInsensitiveKeysMapAccess.convert(metadata); + + + UUID uuid = UUID.fromString(caseInsensitiveMetadata.get(METADATA_SEGMENT_UUID)); long msb = uuid.getMostSignificantBits(); long lsb = uuid.getLeastSignificantBits(); - int position = Integer.parseInt(metadata.get(METADATA_SEGMENT_POSITION)); - int generation = Integer.parseInt(metadata.get(METADATA_SEGMENT_GENERATION)); - int fullGeneration = Integer.parseInt(metadata.get(METADATA_SEGMENT_FULL_GENERATION)); - boolean compacted = Boolean.parseBoolean(metadata.get(METADATA_SEGMENT_COMPACTED)); + int position = Integer.parseInt(caseInsensitiveMetadata.get(METADATA_SEGMENT_POSITION)); + int generation = Integer.parseInt(caseInsensitiveMetadata.get(METADATA_SEGMENT_GENERATION)); + int fullGeneration = Integer.parseInt(caseInsensitiveMetadata.get(METADATA_SEGMENT_FULL_GENERATION)); + boolean compacted = Boolean.parseBoolean(caseInsensitiveMetadata.get(METADATA_SEGMENT_COMPACTED)); return new AzureSegmentArchiveEntry(msb, lsb, position, length, generation, fullGeneration, compacted); } public static boolean isSegment(Map metadata) { - return metadata != null && TYPE_SEGMENT.equals(metadata.get(METADATA_TYPE)); + Map caseInsensitiveMetadata = CaseInsensitiveKeysMapAccess.convert(metadata); + + return metadata != null && TYPE_SEGMENT.equals(caseInsensitiveMetadata.get(METADATA_TYPE)); } } Index: oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureJournalFile.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureJournalFile.java (revision 70a1c21d487d6458f9891a111eb07a51820d090e) +++ oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureJournalFile.java (revision 977e2b495bd9874b099c31a9d9b8315faf4bc09d) @@ -21,6 +21,7 @@ import com.microsoft.azure.storage.blob.CloudBlob; import com.microsoft.azure.storage.blob.CloudBlobDirectory; import com.microsoft.azure.storage.blob.ListBlobItem; +import org.apache.jackrabbit.oak.segment.azure.util.CaseInsensitiveKeysMapAccess; import org.apache.jackrabbit.oak.segment.spi.persistence.JournalFile; import org.apache.jackrabbit.oak.segment.spi.persistence.JournalFileReader; import org.apache.jackrabbit.oak.segment.spi.persistence.JournalFileWriter; @@ -33,6 +34,7 @@ import java.util.Comparator; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -125,9 +127,10 @@ if (!metadataFetched) { blob.downloadAttributes(); metadataFetched = true; - if (blob.getMetadata().containsKey("lastEntry")) { + Map metadata = CaseInsensitiveKeysMapAccess.convert(blob.getMetadata()); + if (metadata.containsKey("lastEntry")) { firstLineReturned = true; - return blob.getMetadata().get("lastEntry"); + return metadata.get("lastEntry"); } } reader = new ReverseFileReader(blob); Index: oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/AzureBlobMetadataTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/AzureBlobMetadataTest.java (revision 467638857b9a0692d8bfcca4b0fe832a7eafdfd2) +++ oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/AzureBlobMetadataTest.java (revision 467638857b9a0692d8bfcca4b0fe832a7eafdfd2) @@ -0,0 +1,82 @@ +package org.apache.jackrabbit.oak.segment.azure; + +import org.junit.Test; + +import java.util.Collections; +import java.util.HashMap; + +import static org.junit.Assert.*; + +public class AzureBlobMetadataTest { + + @Test + public void toSegmentMetadata() { + AzureSegmentArchiveEntry entry = new AzureSegmentArchiveEntry(-7554506325726244935L, -5874985927363300041L, + 3, 5, 50, 60, true); + HashMap map = AzureBlobMetadata.toSegmentMetadata(entry); + + assertEquals("segment", map.get(AzureBlobMetadata.METADATA_TYPE)); + assertEquals("97290085-b1a5-4fb9-ae77-db6d13177537", map.get(AzureBlobMetadata.METADATA_SEGMENT_UUID)); + assertEquals("3", map.get(AzureBlobMetadata.METADATA_SEGMENT_POSITION)); + assertEquals("50", map.get(AzureBlobMetadata.METADATA_SEGMENT_GENERATION)); + assertEquals("60", map.get(AzureBlobMetadata.METADATA_SEGMENT_FULL_GENERATION)); + assertEquals("true", map.get(AzureBlobMetadata.METADATA_SEGMENT_COMPACTED)); + } + + + @Test + public void toIndexEntry() { + HashMap metadata = new HashMap<>(); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_UUID, "97290085-b1a5-4fb9-ae77-db6d13177537"); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_POSITION, "3"); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_GENERATION, "50"); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_FULL_GENERATION, "60"); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_COMPACTED, "true"); + AzureSegmentArchiveEntry azureSegmentArchiveEntry = AzureBlobMetadata.toIndexEntry(metadata, 5); + System.out.println(azureSegmentArchiveEntry); + + + assertEquals(-7554506325726244935L, azureSegmentArchiveEntry.getMsb()); + assertEquals(-5874985927363300041L, azureSegmentArchiveEntry.getLsb()); + assertEquals(3, azureSegmentArchiveEntry.getPosition()); + assertEquals(5, azureSegmentArchiveEntry.getLength()); + assertEquals(50, azureSegmentArchiveEntry.getGeneration()); + assertEquals(60, azureSegmentArchiveEntry.getFullGeneration()); + assertTrue(azureSegmentArchiveEntry.isCompacted()); + } + + + @Test + public void toIndexEntry_caseInsensitive() { + HashMap metadata = new HashMap<>(); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_UUID.toUpperCase(), "97290085-b1a5-4fb9-ae77-db6d13177537"); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_POSITION.toUpperCase(), "3"); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_GENERATION.toUpperCase(), "50"); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_FULL_GENERATION.toUpperCase(), "60"); + metadata.put(AzureBlobMetadata.METADATA_SEGMENT_COMPACTED.toUpperCase(), "true"); + AzureSegmentArchiveEntry azureSegmentArchiveEntry = AzureBlobMetadata.toIndexEntry(metadata, 5); + + assertEquals(-7554506325726244935L, azureSegmentArchiveEntry.getMsb()); + assertEquals(-5874985927363300041L, azureSegmentArchiveEntry.getLsb()); + assertEquals(3, azureSegmentArchiveEntry.getPosition()); + assertEquals(5, azureSegmentArchiveEntry.getLength()); + assertEquals(50, azureSegmentArchiveEntry.getGeneration()); + assertEquals(60, azureSegmentArchiveEntry.getFullGeneration()); + assertTrue(azureSegmentArchiveEntry.isCompacted()); + } + + @Test + public void isSegment() { + assertTrue(AzureBlobMetadata.isSegment(Collections.singletonMap("type", "segment"))); + + assertFalse(AzureBlobMetadata.isSegment(Collections.singletonMap("type", "index"))); + } + + + @Test + public void isSegment_caseInsensitive() { + assertTrue(AzureBlobMetadata.isSegment(Collections.singletonMap("Type", "segment"))); + assertTrue(AzureBlobMetadata.isSegment(Collections.singletonMap("TYPE", "segment"))); + assertTrue(AzureBlobMetadata.isSegment(Collections.singletonMap("tYPE", "segment"))); + } +} Index: oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/util/CaseInsensitiveKeysMapAccess.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/util/CaseInsensitiveKeysMapAccess.java (revision 977e2b495bd9874b099c31a9d9b8315faf4bc09d) +++ oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/util/CaseInsensitiveKeysMapAccess.java (revision 977e2b495bd9874b099c31a9d9b8315faf4bc09d) @@ -0,0 +1,33 @@ +package org.apache.jackrabbit.oak.segment.azure.util; + +import java.util.Collections; +import java.util.Map; +import java.util.TreeMap; + +/** + * Wrapper around the map that allows accessing the map with case-insensitive keys. + * For example, the keys 'hello' and 'Hello' access the same value. + *

+ * If there is a conflicting key, any one of the keys and any one of the values is used. Because of + * the nature of Hashmaps, the result is not deterministic. + */ +public class CaseInsensitiveKeysMapAccess { + + /** + * Wrapper around the map that allows accessing the map with case-insensitive keys. + *

+ * Return an unmodifiable map to make it clear that changes are not reflected to the original map. + * + * @param map the map to convert + * @return an unmodifiable map with case-insensitive key access + */ + public static Map convert(Map map) { + Map caseInsensitiveMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); + if (map != null) { + caseInsensitiveMap.putAll(map); + } + // return an unmodifiable map to make it clear that changes are not reflected in the original map. + return Collections.unmodifiableMap(caseInsensitiveMap); + } + +} Index: oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/util/CaseInsensitiveKeysMapAccessTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/util/CaseInsensitiveKeysMapAccessTest.java (revision 977e2b495bd9874b099c31a9d9b8315faf4bc09d) +++ oak-segment-azure/src/test/java/org/apache/jackrabbit/oak/segment/azure/util/CaseInsensitiveKeysMapAccessTest.java (revision 977e2b495bd9874b099c31a9d9b8315faf4bc09d) @@ -0,0 +1,26 @@ +package org.apache.jackrabbit.oak.segment.azure.util; + +import org.junit.Test; + +import java.util.Collections; +import java.util.Map; + +import static org.junit.Assert.assertEquals; + +public class CaseInsensitiveKeysMapAccessTest { + + @Test + public void convert() { + Map map = CaseInsensitiveKeysMapAccess.convert(Collections.singletonMap("hello", "world")); + + assertEquals("world", map.get("hello")); + assertEquals("world", map.get("Hello")); + assertEquals("world", map.get("hELLO")); + } + + @Test(expected = UnsupportedOperationException.class) + public void assertImmutable() { + Map map = CaseInsensitiveKeysMapAccess.convert(Collections.singletonMap("hello", "world")); + map.put("foo", "bar"); + } +}