diff --git oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java index 5648a96..e17983a 100644 --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java @@ -100,6 +100,15 @@ public class Segment { */ public static final int MEDIUM_LIMIT = (1 << (16 - 2)) + SMALL_LIMIT; + /** + * Maximum size of small blob IDs. A small blob ID is stored in a value + * record whose length field contains the pattern "1110" in its most + * significant bits. Since two bytes are used to store both the bit pattern + * and the actual length of the blob ID, a maximum of 2^12 values can be + * stored in the length field. + */ + public static final int BLOB_ID_SMALL_LIMIT = 1 << 12; + public static final int REF_COUNT_OFFSET = 5; static final int ROOT_COUNT_OFFSET = 6; diff --git oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java index 24f414b..2d7f240 100644 --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java @@ -79,10 +79,11 @@ public class SegmentBlob extends Record implements Blob { segment.readRecordId(offset + 8), listSize); return new SegmentStream(getRecordId(), list, length); } else if ((head & 0xf0) == 0xe0) { - // 1110 xxxx: external value - String refererence = readReference(segment, offset, head); - return segment.getSegmentId().getTracker().getStore() - .readBlob(refererence).getNewStream(); + // 1110 xxxx: external value, short blob ID + return getNewStream(readShortBlobId(segment, offset, head)); + } else if ((head & 0xf8) == 0xf0) { + // 1111 0xxx: external value, long blob ID + return getNewStream(readLongBlobId(segment, offset)); } else { throw new IllegalStateException(String.format( "Unexpected value record type: %02x", head & 0xff)); @@ -104,15 +105,11 @@ public class SegmentBlob extends Record implements Blob { // 110x xxxx: long value return (segment.readLong(offset) & 0x1fffffffffffffffL) + MEDIUM_LIMIT; } else if ((head & 0xf0) == 0xe0) { - // 1110 xxxx: external value - String reference = readReference(segment, offset, head); - long length = segment.getSegmentId().getTracker().getStore() - .readBlob(reference).length(); - if (length == -1) { - throw new IllegalStateException( - "Unknown length of external binary: " + reference); - } - return length; + // 1110 xxxx: external value, short blob ID + return getLength(readShortBlobId(segment, offset, head)); + } else if ((head & 0xf8) == 0xf0) { + // 1111 0xxx: external value, long blob ID + return getLength(readLongBlobId(segment, offset)); } else { throw new IllegalStateException(String.format( "Unexpected value record type: %02x", head & 0xff)); @@ -150,8 +147,8 @@ public class SegmentBlob extends Record implements Blob { Segment segment = getSegment(); int offset = getOffset(); byte head = segment.readByte(offset); - // 1110 xxxx: external value - return (head & 0xf0) == 0xe0; + // 1110 xxxx or 1111 0xxx: external value + return (head & 0xf0) == 0xe0 || (head & 0xf8) == 0xf0; } public String getBlobId() { @@ -159,8 +156,11 @@ public class SegmentBlob extends Record implements Blob { int offset = getOffset(); byte head = segment.readByte(offset); if ((head & 0xf0) == 0xe0) { - // 1110 xxxx: external value - return readReference(segment, offset, head); + // 1110 xxxx: external value, small blob ID + return readShortBlobId(segment, offset, head); + } else if ((head & 0xf8) == 0xf0) { + // 1111 0xxx: external value, long blob ID + return readLongBlobId(segment, offset); } else { return null; } @@ -191,7 +191,10 @@ public class SegmentBlob extends Record implements Blob { return writer.writeLargeBlob(length, list.getEntries()); } } else if ((head & 0xf0) == 0xe0) { - // 1110 xxxx: external value + // 1110 xxxx: external value, short blob ID + return writer.writeExternalBlob(getBlobId()); + } else if ((head & 0xf8) == 0xf0) { + // 1111 0xxx: external value, long blob ID return writer.writeExternalBlob(getBlobId()); } else { throw new IllegalStateException(String.format( @@ -222,14 +225,18 @@ public class SegmentBlob extends Record implements Blob { //-----------------------------------------------------------< private >-- - private static String readReference( - Segment segment, int offset, byte head) { + private static String readShortBlobId(Segment segment, int offset, byte head) { int length = (head & 0x0f) << 8 | (segment.readByte(offset + 1) & 0xff); byte[] bytes = new byte[length]; segment.readBytes(offset + 2, bytes, 0, length); return new String(bytes, UTF_8); } + private static String readLongBlobId(Segment segment, int offset) { + RecordId blobIdRecordId = segment.readRecordId(offset + 1); + return Segment.readString(blobIdRecordId); + } + private Iterable getBulkSegmentIds() { Segment segment = getSegment(); int offset = getOffset(); @@ -250,4 +257,22 @@ public class SegmentBlob extends Record implements Blob { } } + private Blob getBlob(String blobId) { + return getSegment().getSegmentId().getTracker().getStore().readBlob(blobId); + } + + private InputStream getNewStream(String blobId) { + return getBlob(blobId).getNewStream(); + } + + private long getLength(String blobId) { + long length = getBlob(blobId).length(); + + if (length == -1) { + throw new IllegalStateException(String.format("Unknown length of external binary: %s", blobId)); + } + + return length; + } + } diff --git oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java index ab762f7..aec06a2 100644 --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java @@ -586,32 +586,76 @@ public class SegmentWriter { } /** - * Write a reference to an external blob. + * Write a reference to an external blob. This method handles blob IDs of + * every length, but behaves differently for small and large blob IDs. * - * @param reference reference - * @return record id + * @param blobId Blob ID. + * @return Record ID pointing to the written blob ID. */ - private synchronized RecordId writeValueRecord(String reference) { - byte[] data = reference.getBytes(Charsets.UTF_8); - int length = data.length; + private RecordId writeBlobId(String blobId) { + byte[] data = blobId.getBytes(Charsets.UTF_8); - // When writing a binary ID, the four most significant bits of the - // length field should be "1110", leaving 12 other bits to store the - // length itself. This means that the values of the length field can - // only range between 0 and 2^12 - 1. + if (data.length < Segment.BLOB_ID_SMALL_LIMIT) { + return writeSmallBlobId(data); + } - checkArgument(length < 4096); + return writeLargeBlobId(blobId); + } - RecordId id = prepare(RecordType.VALUE, 2 + length); - int len = length | 0xE000; - buffer[position++] = (byte) (len >> 8); - buffer[position++] = (byte) len; + /** + * Write a large blob ID. A blob ID is considered large if the length of its + * binary representation is equal to or greater than {@code + * Segment.BLOB_ID_SMALL_LIMIT}. + * + * @param blobId Blob ID. + * @return A record ID pointing to the written blob ID. + */ + private RecordId writeLargeBlobId(String blobId) { + RecordId stringRecord = writeString(blobId); - System.arraycopy(data, 0, buffer, position, length); - position += length; + synchronized (this) { + RecordId blobIdRecord = prepare(RecordType.VALUE, 1, Collections.singletonList(stringRecord)); - blobrefs.add(id); - return id; + // The length uses a fake "length" field that is always equal to 0xF0. + // This allows the code to take apart small from a large blob IDs. + + buffer[position++] = (byte) 0xF0; + writeRecordId(stringRecord); + + blobrefs.add(blobIdRecord); + + return blobIdRecord; + } + } + + /** + * Write a small blob ID. A blob ID is considered small if the length of its + * binary representation is less than {@code Segment.BLOB_ID_SMALL_LIMIT}. + * + * @param blobId Blob ID. + * @return A record ID pointing to the written blob ID. + */ + private RecordId writeSmallBlobId(byte[] blobId) { + int length = blobId.length; + + checkArgument(length < Segment.BLOB_ID_SMALL_LIMIT); + + synchronized (this) { + RecordId id = prepare(RecordType.VALUE, 2 + length); + + int masked = length | 0xE000; + + buffer[position++] = (byte) (masked >> 8); + buffer[position++] = (byte) (masked); + + System.arraycopy(blobId, 0, buffer, position, length); + + position += length; + + blobrefs.add(id); + + return id; + } } /** @@ -780,7 +824,7 @@ public class SegmentWriter { if (reference != null && store.getBlobStore() != null) { String blobId = store.getBlobStore().getBlobId(reference); if (blobId != null) { - RecordId id = writeValueRecord(blobId); + RecordId id = writeBlobId(blobId); return new SegmentBlob(id); } else { log.debug("No blob found for reference {}, inlining...", reference); @@ -791,7 +835,7 @@ public class SegmentWriter { } SegmentBlob writeExternalBlob(String blobId) throws IOException { - RecordId id = writeValueRecord(blobId); + RecordId id = writeBlobId(blobId); return new SegmentBlob(id); } @@ -839,7 +883,7 @@ public class SegmentWriter { } else if (blobStore != null) { String blobId = blobStore.writeBlob(new SequenceInputStream( new ByteArrayInputStream(data, 0, n), stream)); - return writeValueRecord(blobId); + return writeBlobId(blobId); } long length = n; diff --git oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java index 37f148d..900dc0e 100644 --- oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java +++ oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java @@ -19,14 +19,6 @@ package org.apache.jackrabbit.oak.plugins.segment.file; -import static org.junit.Assert.assertEquals; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; - -import java.io.InputStream; -import java.util.Random; - import com.google.common.base.Strings; import org.apache.jackrabbit.oak.plugins.segment.Segment; import org.apache.jackrabbit.oak.plugins.segment.SegmentBlob; @@ -37,6 +29,14 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.IOException; +import java.io.InputStream; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; + public class ExternalBlobReferenceTest { @Rule @@ -59,9 +59,9 @@ public class ExternalBlobReferenceTest { /** * The {@code SegmentWriter} should be able to write blob IDs whose length - * is between 0 and 4095 bytes. It should be possible to correctly read the - * blob ID back and pass it to the {@code BlobStore} to obtain information - * about the blob. + * is between 0 and {@code Segment.BLOB_ID_SMALL_LIMIT - 1} bytes. It should + * be possible to correctly read the blob ID back and pass it to the {@code + * BlobStore} to obtain information about the blob. *

* This code path executes only if the written stream is {@code * Segment.MEDIUM_LIMIT} bytes long (or more). If the length of the stream @@ -71,59 +71,85 @@ public class ExternalBlobReferenceTest { * See OAK-3105. */ @Test - public void testBlobIdLengthUpperLimit() throws Exception { - String blobId = Strings.repeat("x", 4095); - long blobLength = Segment.MEDIUM_LIMIT; - - doReturn(blobId).when(blobStore).writeBlob(any(InputStream.class)); - doReturn(blobLength).when(blobStore).getBlobLength(blobId); - - SegmentBlob blob = fileStore.getTracker().getWriter().writeStream(newRandomInputStream(blobLength)); - - assertEquals(blobLength, blob.length()); + public void testShortBlobId() throws Exception { + testBlobIdWithLength(Segment.BLOB_ID_SMALL_LIMIT - 1); } /** - * If the {@code BlobStore} returns a blob ID whose length is 4096 byes long - * (or more), writing the stream should throw an {@code - * IllegalArgumentException}. + * If the {@code BlobStore} returns a blob ID whose length is {@code + * Segment.BLOB_ID_SMALL_LIMIT} bytes long (or more), writing the stream + * should succeed. In this case, the blob ID is considered a long blob ID + * and an alternate encoding is used. It should be possible to correctly + * read the blob ID back and pass it to the {@code BlobStore} to obtain + * information about the blob. *

* This code path executes only if the written stream is {@code * Segment.MEDIUM_LIMIT} bytes long (or more). If the length of the stream * is smaller, the binary value is inlined in the segment and the {@code * BlobStore} is never called. *

- * See OAK-3105. + * See OAK-3105 and OAK-3107. */ - @Test(expected = IllegalArgumentException.class) - public void testBlobIdLengthLongerThanUpperLimit() throws Exception { - String blobId = Strings.repeat("x", 4096); + @Test + public void testLongBlobId() throws Exception { + testBlobIdWithLength(Segment.BLOB_ID_SMALL_LIMIT); + } + + private void testBlobIdWithLength(int blobIdLength) throws Exception { + String blobId = Strings.repeat("x", blobIdLength); long blobLength = Segment.MEDIUM_LIMIT; doReturn(blobId).when(blobStore).writeBlob(any(InputStream.class)); + doReturn(blobLength).when(blobStore).getBlobLength(blobId); + + SegmentBlob blob = fileStore.getTracker().getWriter().writeStream(newRandomInputStream(blobLength)); + + assertEquals(blobLength, blob.length()); + } + + private InputStream newRandomInputStream(long size) { + return new LimitInputStream(new ConstantInputStream(0), size); + } + + private static class ConstantInputStream extends InputStream { + + private final int value; + + public ConstantInputStream(int value) { + this.value = value; + } + + @Override + public int read() { + return value; + } - fileStore.getTracker().getWriter().writeStream(newRandomInputStream(blobLength)); } - private InputStream newRandomInputStream(final long size) { - return new InputStream() { + private static class LimitInputStream extends InputStream { - private Random random = new Random(); + private final InputStream stream; - private int read = 0; + private final long limit; - @Override - public int read() { - if (read >= size) { - return -1; - } + private long read = 0; - read += 1; + public LimitInputStream(InputStream stream, long limit) { + this.stream = stream; + this.limit = limit; + } - return random.nextInt() & 0xFF; + @Override + public int read() throws IOException { + if (read >= limit) { + return -1; } - }; + read = read + 1; + + return stream.read(); + } + } }