From e6623b3ca25982994c0bd4e1e08f0e3449ded7c2 Mon Sep 17 00:00:00 2001 From: Thomas Mueller Date: Thu, 31 Jul 2014 06:53:54 +0000 Subject: [PATCH 1/7] OAK-1995 Improved SegmentNodeStore documentation (actually: remove unused class) git-svn-id: https://svn.apache.org/repos/asf/jackrabbit/oak/trunk@1614816 13f79535-47bb-0310-9956-ffa450edef68 --- .../jackrabbit/oak/plugins/segment/MergeDiff.java | 85 ---------------------- 1 file changed, 85 deletions(-) delete mode 100644 oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MergeDiff.java diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MergeDiff.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MergeDiff.java deleted file mode 100644 index f2a74c8..0000000 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MergeDiff.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jackrabbit.oak.plugins.segment; - -import org.apache.jackrabbit.oak.api.PropertyState; -import org.apache.jackrabbit.oak.spi.state.NodeBuilder; -import org.apache.jackrabbit.oak.spi.state.NodeState; -import org.apache.jackrabbit.oak.spi.state.NodeStateDiff; - -public class MergeDiff implements NodeStateDiff { - - private final NodeBuilder builder; - - public MergeDiff(NodeBuilder builder) { - this.builder = builder; - } - - @Override - public boolean propertyAdded(PropertyState after) { - if (!builder.hasProperty(after.getName())) { - builder.setProperty(after); - } - return true; - } - - @Override - public boolean propertyChanged(PropertyState before, PropertyState after) { - PropertyState other = builder.getProperty(before.getName()); - if (other != null && other.equals(before)) { - builder.setProperty(after); - } - return true; - } - - @Override - public boolean propertyDeleted(PropertyState before) { - PropertyState other = builder.getProperty(before.getName()); - if (other != null && other.equals(before)) { - builder.removeProperty(before.getName()); - } - return true; - } - - @Override - public boolean childNodeAdded(String name, NodeState after) { - if (!builder.hasChildNode(name)) { - builder.setChildNode(name, after); - } - return true; - } - - @Override - public boolean childNodeChanged( - String name, NodeState before, NodeState after) { - if (builder.hasChildNode(name)) { - after.compareAgainstBaseState( - before, new MergeDiff(builder.child(name))); - } - return true; - } - - @Override - public boolean childNodeDeleted(String name, NodeState before) { - if (builder.hasChildNode(name) - && before.equals(builder.child(name).getNodeState())) { - builder.getChildNode(name).remove(); - } - return true; - } - -} -- 1.8.4.3 From c6918c0d8c587631acc5f9afddb52144c9b2f89d Mon Sep 17 00:00:00 2001 From: Thomas Mueller Date: Thu, 31 Jul 2014 06:55:35 +0000 Subject: [PATCH 2/7] OAK-1995 Improved SegmentNodeStore documentation git-svn-id: https://svn.apache.org/repos/asf/jackrabbit/oak/trunk@1614817 13f79535-47bb-0310-9956-ffa450edef68 Conflicts: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeState.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeStoreService.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java --- .../jackrabbit/oak/plugins/segment/MapRecord.java | 2 + .../oak/plugins/segment/PropertyTemplate.java | 6 +- .../jackrabbit/oak/plugins/segment/RecordId.java | 4 + .../jackrabbit/oak/plugins/segment/RecordType.java | 87 ++++++++++++++++++++++ .../jackrabbit/oak/plugins/segment/Segment.java | 6 ++ .../oak/plugins/segment/SegmentNodeBuilder.java | 6 ++ .../oak/plugins/segment/SegmentNodeStore.java | 3 + .../oak/plugins/segment/SegmentPropertyState.java | 4 + .../oak/plugins/segment/SegmentStore.java | 3 + .../oak/plugins/segment/SegmentStream.java | 3 + .../oak/plugins/segment/SegmentTracker.java | 2 + .../oak/plugins/segment/SegmentWriter.java | 41 ++++++++++ .../jackrabbit/oak/plugins/segment/Template.java | 6 ++ 13 files changed, 172 insertions(+), 1 deletion(-) diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java index 2ea7086..1d86281 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java @@ -63,12 +63,14 @@ class MapRecord extends Record { /** * Number of bits needed to indicate the current trie level. + * Currently 4. */ protected static final int LEVEL_BITS = // 4, using nextPowerOfTwo(): numberOfTrailingZeros(highestOneBit(MAX_NUMBER_OF_LEVELS) << 1); /** * Number of bits used to indicate the size of a map. + * Currently 28. */ protected static final int SIZE_BITS = 32 - LEVEL_BITS; diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/PropertyTemplate.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/PropertyTemplate.java index 34f54bf..d89bf74 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/PropertyTemplate.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/PropertyTemplate.java @@ -25,6 +25,10 @@ import com.google.common.collect.ComparisonChain; class PropertyTemplate implements Comparable { + /** + * The index of this property within the list of properties in the node + * template. + */ private final int index; private final String name; @@ -39,7 +43,7 @@ class PropertyTemplate implements Comparable { PropertyTemplate(PropertyState state) { checkNotNull(state); - this.index = 0; // TODO: is this used anywhere + this.index = 0; this.name = state.getName(); this.type = state.getType(); } diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordId.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordId.java index 580a732..e59b471 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordId.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordId.java @@ -25,6 +25,10 @@ import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; +/** + * The record id. This includes the segment id and the offset within the + * segment. + */ public final class RecordId implements Comparable { private static final Pattern PATTERN = Pattern.compile( diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java index 3fd1b60..814ce98 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java @@ -16,22 +16,109 @@ */ package org.apache.jackrabbit.oak.plugins.segment; +/** + * The type of a record in a segment. + */ enum RecordType { + /** + * A leaf of a map (which is a HAMT tree). This contains + *
    + *
  • the size (int)
  • + *
  • for each entry, the hash code of the key (4 bytes), then the record id of + * the key and the record id of the value
  • + *
+ */ LEAF, + /** + * A branch of a map (which is a HAMT tree). This contains + *
    + *
  • level within the HAMT structure (4 most significant bits), plus size + * of the that branch of the map
  • + *
  • bitmap (4 bytes)
  • + *
  • record ids of the buckets of the next level of the map
  • + *
+ * There is a special case: if the first int (level/size) is -1, then it's a + * diff record, to handle the common case of when exactly one existing child + * node was modified. This is common because whenever one node was changed, + * we need to propagate that up to the root. + *
    + *
  • -1 (int)
  • + *
  • hash code of the key that was changed (4 bytes)
  • + *
  • the record id of the key
  • + *
  • the record id of the value
  • + *
  • the record id of the (base version of the) modified map
  • + *
+ * There is only ever one single diff record for a map. + */ BRANCH, + /** + * A bucket (a list of references). It always includes at least 2 elements, + * up to 255 entries (because each entry could in theory point to a + * different segment, in which case this couldn't be stored in a segement). + * This contains just the record ids. The size of the list is not stored, as + * it is stored along with the reference to this record. + */ BUCKET, + /** + * A list including the size (an int). This could be 0, in which case there + * is no reference. If the size is 1, then reference points to the value of + * the list. If the size is larger, then a record id follows, which points + * to a bucket with the actual record ids. If there are more than 255 + * entries in the list, then the list is partitioned into sublists of 255 + * entries each, which are stored kind of recursively. + */ LIST, + /** + * A short value (for example a string, or a long). The format is: length + * (variable length encoding, one byte if shorter than 128, else more + * bytes), then the data as a byte array, or, for large values, a record id + * of the top level bucket that contains the list of block record ids of the + * actual binary data. + */ VALUE, + /** + * A block of bytes (a binary value, or a part of a binary value, or part of + * large strings). It only contains the raw data. + */ BLOCK, + /** + * A template (the "hidden class" of a node; inspired by the Chrome V8 + * Javascript engine). This includes a list of property templates. Format: + *
    + *
  • head (int), which is: 1 bit (most significant one) whether the node + * has a single valued jcr:primaryType property. 1 bit whether it has + * mixins, in which case 10 bits (27 to 18) are used for the number of + * mixins. 1 bit whether the node has no child nodes. 1 bit whether the node + * has more than one child nodes. 18 bits (0 to 17) the number of properties + * (0 to 262143).
  • + *
  • The record ids of: if needed, record id of the primary type (a + * value), record ids of the mixin names (value records), for single child + * node: the name of the child node
  • + *
  • The list of record ids of property names (which are stored before the + * template in separate value records), and the property type (negative + * values for multi-value properties).
  • + *
+ */ TEMPLATE, + /** + * A JCR node, which contains a list of record ids: + *
    + *
  • the record id of the template
  • + *
  • depending on the template, the record id of the map of the ids of the + * child node name(s) and child node record id(s), or if there is just one + * child node, the child node record id
  • + *
  • the record ids of the property values (for multi-valued property a + * pointer to the list record)
  • + *
+ */ NODE } diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java index a96c019..e2e7e36 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java @@ -43,6 +43,12 @@ import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector; import org.apache.jackrabbit.oak.plugins.memory.PropertyStates; import org.jclouds.javax.annotation.Nullable; +/** + * A list of records. + *

+ * Record data is not kept in memory, but some entries are cached (templates, + * all strings in the segment). + */ public class Segment { /** diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeBuilder.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeBuilder.java index 7748034..1e1e5cf 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeBuilder.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeBuilder.java @@ -23,6 +23,12 @@ import org.apache.jackrabbit.oak.api.Blob; import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; +/** + * A node builder that keeps track of the number of updates + * (set property calls and so on). If there are too many updates, + * getNodeState() is called, which will write the records to the segment, + * and that might persist the changes (if the segment is flushed). + */ public class SegmentNodeBuilder extends MemoryNodeBuilder { /** diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeStore.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeStore.java index c4cb574..d50e329 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeStore.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeStore.java @@ -59,6 +59,9 @@ import org.apache.jackrabbit.oak.spi.state.NodeStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * The top level class for the segment store. + */ public class SegmentNodeStore implements NodeStore, Observable { private static final Logger log = LoggerFactory diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentPropertyState.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentPropertyState.java index a10b4cf..759ef29 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentPropertyState.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentPropertyState.java @@ -49,6 +49,10 @@ import static org.apache.jackrabbit.oak.api.Type.STRING; import static org.apache.jackrabbit.oak.api.Type.URI; import static org.apache.jackrabbit.oak.api.Type.WEAKREFERENCE; +/** + * A property, which can read a value or list record from a segment. + * It currently doesn't cache data. + */ public class SegmentPropertyState extends Record implements PropertyState { private final PropertyTemplate template; diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStore.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStore.java index 95ab46d..28ba0cd 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStore.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStore.java @@ -22,6 +22,9 @@ import javax.annotation.Nonnull; import org.apache.jackrabbit.oak.api.Blob; import org.apache.jackrabbit.oak.spi.blob.BlobStore; +/** + * The backend storage interface used by the segment node store. + */ public interface SegmentStore { SegmentTracker getTracker(); diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java index 09bc1bc..55671ce 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java @@ -31,6 +31,9 @@ import javax.annotation.CheckForNull; import com.google.common.base.Charsets; import com.google.common.io.ByteStreams; +/** + * For reading any value records as binary streams. + */ public class SegmentStream extends InputStream { @CheckForNull diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java index 8b63d0c..0961ddb 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java @@ -40,6 +40,8 @@ import org.slf4j.LoggerFactory; /** * Tracker of references to segment identifiers and segment instances * that are currently kept in memory. + *

+ * It is also responsible to cache segment objects in memory. */ public class SegmentTracker { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java index 42ce991..8ec678d 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java @@ -72,6 +72,9 @@ import org.apache.jackrabbit.oak.spi.state.NodeState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * Converts records to byte arrays, in order to create segments. + */ public class SegmentWriter { /** Logger instance */ @@ -163,6 +166,11 @@ public class SegmentWriter { segment.getSegmentId().setSegment(segment); } + /** + * Adds a segment header to the buffer and writes a segment to the segment + * store. This is done automatically (called from prepare) when there is not + * enough space for a record. It can also be called explicitly. + */ public synchronized void flush() { if (length > 0) { int refcount = segment.getRefCount(); @@ -239,6 +247,23 @@ public class SegmentWriter { return prepare(type, size, Collections.emptyList()); } + /** + * Before writing a record (which are written backwards, from the end of the + * file to the beginning), this method is called, to ensure there is enough + * space. A new segment is also created if there is not enough space in the + * segment lookup table or elsewhere. + *

+ * This method does not actually write into the segment, just allocates the + * space (flushing the segment if needed and starting a new one), and sets + * the write position (records are written from the end to the beginning, + * but within a record from left to right). + * + * @param type the record type (only used for root records) + * @param size the size of the record, excluding the size used for the + * record ids + * @param ids the record ids + * @return a new record id + */ private RecordId prepare( RecordType type, int size, Collection ids) { checkArgument(size >= 0); @@ -310,6 +335,12 @@ public class SegmentWriter { return refcount; } + /** + * Write a record id, and marks the record id as referenced (removes it from + * the unreferenced set). + * + * @param recordId the record id + */ private synchronized void writeRecordId(RecordId recordId) { checkNotNull(recordId); roots.remove(recordId); @@ -954,6 +985,8 @@ public class SegmentWriter { RecordId[] propertyNames = new RecordId[properties.length]; byte[] propertyTypes = new byte[properties.length]; for (int i = 0; i < properties.length; i++) { + // Note: if the property names are stored in more than 255 separate + // segments, this will not work. propertyNames[i] = writeString(properties[i].getName()); Type type = properties[i].getType(); if (type.isArray()) { @@ -1007,6 +1040,14 @@ public class SegmentWriter { return id; } + /** + * If the given node was compacted, return the compacted node, otherwise + * return the passed node. This is to avoid pointing to old nodes, if they + * have been compacted. + * + * @param state the node + * @return the compacted node (if it was compacted) + */ private SegmentNodeState uncompact(SegmentNodeState state) { RecordId id = tracker.getCompactionMap().get(state.getRecordId()); if (id != null) { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Template.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Template.java index 9e38a85..e3643b8 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Template.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Template.java @@ -40,6 +40,12 @@ import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry; import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; import org.apache.jackrabbit.oak.spi.state.NodeState; +/** + * The in-memory representation of a "hidden class" of a node; inspired by the + * Chrome V8 Javascript engine). + *

+ * Templates are always read fully in-memory. + */ public class Template { static final String ZERO_CHILD_NODES = null; -- 1.8.4.3 From 0e15478ff5f913afc6db44edcd1de2ad15f0ff2d Mon Sep 17 00:00:00 2001 From: Jukka Zitting Date: Thu, 31 Jul 2014 07:53:02 +0000 Subject: [PATCH 3/7] OAK-1995: Improved SegmentNodeStore documentation Add a warning about the complex internals of MemoryNodeBuilder git-svn-id: https://svn.apache.org/repos/asf/jackrabbit/oak/trunk@1614818 13f79535-47bb-0310-9956-ffa450edef68 Conflicts: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/memory/MemoryNodeBuilder.java --- .../jackrabbit/oak/plugins/memory/MemoryNodeBuilder.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/memory/MemoryNodeBuilder.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/memory/MemoryNodeBuilder.java index 1d95085..6c382ce 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/memory/MemoryNodeBuilder.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/memory/MemoryNodeBuilder.java @@ -16,6 +16,16 @@ */ package org.apache.jackrabbit.oak.plugins.memory; +// WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! // +// // +// This class has complex internals that have evolved in over a hundred // +// commits. It is a central component in how Oak handles modifications to // +// content trees. Please use 'svn blame', 'svn log' and the referenced // +// Jira issues to understand the reason for some of the more complex parts // +// of this class. See also the MemoryNodeBuilderTest for existing tests. // +// // +// WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! // + import static com.google.common.base.Objects.toStringHelper; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; -- 1.8.4.3 From 6e9680fb23220cf111e58e9d967c89de79785452 Mon Sep 17 00:00:00 2001 From: Jukka Zitting Date: Thu, 31 Jul 2014 07:53:34 +0000 Subject: [PATCH 4/7] OAK-1995: Improved SegmentNodeStore documentation Add some TODOs for areas that could do with extra documentation git-svn-id: https://svn.apache.org/repos/asf/jackrabbit/oak/trunk@1614819 13f79535-47bb-0310-9956-ffa450edef68 Conflicts: oak-doc/src/site/markdown/segmentmk.md --- oak-doc/src/site/markdown/segmentmk.md | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/oak-doc/src/site/markdown/segmentmk.md b/oak-doc/src/site/markdown/segmentmk.md index a142a6c..3fd5a8d 100644 --- a/oak-doc/src/site/markdown/segmentmk.md +++ b/oak-doc/src/site/markdown/segmentmk.md @@ -20,7 +20,8 @@ SegmentMK design overview The SegmentMK is an Oak storage backend that stores content as various types of *records* within larger *segments*. One or more *journals* are -used to track the latest state of the repository. +used to track the latest state of the repository. In the TarMK implementation +only one "root" journal is used. The SegmentMK was designed from the ground up based on the following key principles: @@ -107,8 +108,13 @@ The segment header consists of the following fields: | Root record references (rootcount x 3 bytes) | | | | ...... +--------+--------+--------+ - | | - +--------+--------+--------+--------+--------+ + | | | + +--------+--------+--------+--------+--------+ + + | External blob record references (blobrefcount x 2 bytes) | + | | + | ...... +--------+--------+--------+ + | | padding (set to 0) | + +--------+--------+--------+--------+--------+--------+--------+--------+ The first three bytes of a segment always contain the ASCII string "0aK", which is intended to make the binary segment data format easily detectable. @@ -137,7 +143,7 @@ Journals are special, atomically updated documents that record the state of the repository as a sequence of references to successive root node records. -A small system could consist of just a single journal and would +A small system (like TarMK) could use just a single journal and would serialize all repository updates through atomic updates of that journal. A larger system that needs more write throughput can have more journals, linked to each other in a tree hierarchy. Commits to journals in lower @@ -225,6 +231,8 @@ The result is a hierarchically stored immutable map where each element can be accessed in O(log N) time and the size overhead of updating or inserting list elements is also O(log N). +TODO: Links to HAMT documentation + Value records ------------- @@ -304,3 +312,18 @@ and child nodes. This way a node can become arbitrarily large and still remain reasonably efficient to access and modify. The main downside of this alternative storage layout is that the ordering of child nodes is lost. + +TarMK +===== + +TODO: + +- tar entry checksums +- graph and index entries +- recovery mechanism +- tar generations / cleanup +- journal.log +- compaction +- cleanup +- backup +- slow startup / journal.log -- 1.8.4.3 From 692b48601ace6b0f699f591b2897f617c85c5480 Mon Sep 17 00:00:00 2001 From: Jukka Zitting Date: Thu, 31 Jul 2014 07:54:18 +0000 Subject: [PATCH 5/7] OAK-1995: Improved SegmentNodeStore documentation Document the magic constants used in MapRecord git-svn-id: https://svn.apache.org/repos/asf/jackrabbit/oak/trunk@1614820 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java index 1d86281..d359c99 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java @@ -37,10 +37,18 @@ import com.google.common.collect.ComparisonChain; class MapRecord extends Record { + /** + * Magic constant from a random number generator, used to generate + * good hash values. + */ private static final int M = 0xDEECE66D; private static final int A = 0xB; static final long HASH_MASK = 0xFFFFFFFFL; + /** + * Generates a hash code for the value, using a random number generator + * to improve the distribution of the hash values. + */ static int getHash(String name) { return (name.hashCode() ^ M) * M + A; } -- 1.8.4.3 From 24d4228659dabd5f7805666b1e6d3b4075075132 Mon Sep 17 00:00:00 2001 From: Thomas Mueller Date: Thu, 31 Jul 2014 10:50:39 +0000 Subject: [PATCH 6/7] OAK-1995 Improved SegmentNodeStore documentation git-svn-id: https://svn.apache.org/repos/asf/jackrabbit/oak/trunk@1614856 13f79535-47bb-0310-9956-ffa450edef68 Conflicts: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/BlockRecord.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java --- .../oak/plugins/segment/BlockRecord.java | 3 + .../jackrabbit/oak/plugins/segment/ListRecord.java | 3 + .../jackrabbit/oak/plugins/segment/MapRecord.java | 4 ++ .../oak/plugins/segment/PropertyTemplate.java | 4 ++ .../jackrabbit/oak/plugins/segment/RecordType.java | 6 +- .../jackrabbit/oak/plugins/segment/Segment.java | 2 + .../oak/plugins/segment/SegmentBlob.java | 5 +- .../jackrabbit/oak/plugins/segment/SegmentId.java | 4 +- .../oak/plugins/segment/SegmentNodeStore.java | 3 + .../oak/plugins/segment/SegmentPropertyState.java | 7 +- .../oak/plugins/segment/SegmentStream.java | 2 +- .../oak/plugins/segment/SegmentWriter.java | 8 ++- .../oak/plugins/segment/file/BackgroundThread.java | 7 ++ .../oak/plugins/segment/file/FileAccess.java | 35 +++------- .../oak/plugins/segment/file/FileBlob.java | 78 ---------------------- .../oak/plugins/segment/file/FileStore.java | 3 + .../oak/plugins/segment/file/TarEntry.java | 4 ++ .../oak/plugins/segment/file/TarReader.java | 23 +++++++ .../oak/plugins/segment/file/TarWriter.java | 36 +++++++++- .../oak/plugins/segment/memory/MemoryStore.java | 3 + 20 files changed, 127 insertions(+), 113 deletions(-) delete mode 100644 oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileBlob.java diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/BlockRecord.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/BlockRecord.java index 9eeb2fe..8a8ebeb 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/BlockRecord.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/BlockRecord.java @@ -20,6 +20,9 @@ import static com.google.common.base.Preconditions.checkElementIndex; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkPositionIndexes; +/** + * A record of type "BLOCK". + */ class BlockRecord extends Record { private final int size; diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java index cd3b20d..3dffc94 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ListRecord.java @@ -25,6 +25,9 @@ import static java.util.Collections.singletonList; import java.util.List; +/** + * A record of type "LIST". + */ class ListRecord extends Record { static final int LEVEL_SIZE = Segment.SEGMENT_REFERENCE_LIMIT; diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java index d359c99..8b816b9 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/MapRecord.java @@ -35,6 +35,10 @@ import org.apache.jackrabbit.oak.spi.state.NodeStateDiff; import com.google.common.base.Objects; import com.google.common.collect.ComparisonChain; +/** + * A map. The top level record is either a record of type "BRANCH" or "LEAF" + * (depending on the data). + */ class MapRecord extends Record { /** diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/PropertyTemplate.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/PropertyTemplate.java index d89bf74..572a248 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/PropertyTemplate.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/PropertyTemplate.java @@ -23,6 +23,10 @@ import org.apache.jackrabbit.oak.api.Type; import com.google.common.collect.ComparisonChain; +/** + * A property definition within a template (the property name, the type, and the + * index within the list of properties for the given node). + */ class PropertyTemplate implements Comparable { /** diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java index 814ce98..260b43d 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java @@ -74,11 +74,13 @@ enum RecordType { LIST, /** - * A short value (for example a string, or a long). The format is: length - * (variable length encoding, one byte if shorter than 128, else more + * A value (for example a string, or a long, or a blob). The format is: + * length (variable length encoding, one byte if shorter than 128, else more * bytes), then the data as a byte array, or, for large values, a record id * of the top level bucket that contains the list of block record ids of the * actual binary data. + *

+ * Therefore, a value can reference other records. */ VALUE, diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java index e2e7e36..5dbda95 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java @@ -48,6 +48,8 @@ import org.jclouds.javax.annotation.Nullable; *

* Record data is not kept in memory, but some entries are cached (templates, * all strings in the segment). + *

+ * This class includes method to read records from the raw bytes. */ public class Segment { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java index 3afe2ed..ed86278 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java @@ -35,6 +35,9 @@ import org.apache.jackrabbit.oak.api.Blob; import org.apache.jackrabbit.oak.plugins.memory.AbstractBlob; import org.apache.jackrabbit.oak.spi.blob.BlobStore; +/** + * A BLOB (stream of bytes). This is a record of type "VALUE". + */ public class SegmentBlob extends Record implements Blob { public static Iterable getBulkSegmentIds(Blob blob) { @@ -122,7 +125,7 @@ public class SegmentBlob extends Record implements Blob { getStore().getBlobStore(); if (blobStore != null) { return blobStore.getReference(blobId); - }else{ + } else { throw new IllegalStateException("Attempt to read external blob with blobId [" + blobId + "] " + "without specifying BlobStore"); } diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java index 27c9714..1e5156d 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java @@ -25,7 +25,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Segment identifier. + * Segment identifier. There are two types of segments: data segments, and bulk + * segments. Data segments have a header and may reference other segments; bulk + * segments do not. */ public class SegmentId implements Comparable { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeStore.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeStore.java index d50e329..cdd486b 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeStore.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentNodeStore.java @@ -61,6 +61,9 @@ import org.slf4j.LoggerFactory; /** * The top level class for the segment store. + *

+ * The root node of the JCR content tree is actually stored in the node "/root", + * and checkpoints are stored under "/checkpoints". */ public class SegmentNodeStore implements NodeStore, Observable { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentPropertyState.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentPropertyState.java index 759ef29..0ece866 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentPropertyState.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentPropertyState.java @@ -50,8 +50,11 @@ import static org.apache.jackrabbit.oak.api.Type.URI; import static org.apache.jackrabbit.oak.api.Type.WEAKREFERENCE; /** - * A property, which can read a value or list record from a segment. - * It currently doesn't cache data. + * A property, which can read a value or list record from a segment. It + * currently doesn't cache data. + *

+ * Depending on the property type, this is a record of type "VALUE" or a record + * of type "LIST" (for arrays). */ public class SegmentPropertyState extends Record implements PropertyState { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java index 55671ce..a38a99e 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java @@ -32,7 +32,7 @@ import com.google.common.base.Charsets; import com.google.common.io.ByteStreams; /** - * For reading any value records as binary streams. + * For reading any record of type "VALUE" as binary streams. */ public class SegmentStream extends InputStream { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java index 8ec678d..74a761d 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java @@ -73,7 +73,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Converts records to byte arrays, in order to create segments. + * Converts nodes, properties, and values to records, which are written to a + * byte array, in order to create segments. + *

+ * The same writer is used to create multiple segments (data is automatically + * split: new segments are automatically created if and when needed). */ public class SegmentWriter { @@ -824,7 +828,7 @@ public class SegmentWriter { return new SegmentBlob(id); } - public synchronized void dropCache(){ + public synchronized void dropCache() { records.clear(); } diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/BackgroundThread.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/BackgroundThread.java index a293585..07702c3 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/BackgroundThread.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/BackgroundThread.java @@ -23,6 +23,13 @@ import java.util.Date; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * A small wrapper around the Thread class that periodically calls a runnable. + * Please note the Runnable.run() method is not supposed to loop itself, instead + * it should just do one operation. This class calls Runnable.run() repeatedly. + * This class also measures and logs the time taken by the Runnable.run() + * method. + */ class BackgroundThread extends Thread { /** Logger instance */ diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileAccess.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileAccess.java index 605f119..2cb949e 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileAccess.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileAccess.java @@ -23,22 +23,26 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; -import java.util.zip.CRC32; +/** + * A wrapper around either memory mapped files or random access files, to allow + * reading from a file. + */ abstract class FileAccess { abstract boolean isMemoryMapped(); abstract int length() throws IOException; - abstract long crc32(int position, int size) throws IOException; - abstract ByteBuffer read(int position, int length) throws IOException; abstract void close() throws IOException; //-----------------------------------------------------------< private >-- + /** + * The implementation that uses memory mapped files. + */ static class Mapped extends FileAccess { private final MappedByteBuffer buffer; @@ -58,19 +62,6 @@ abstract class FileAccess { } @Override - public long crc32(int position, int length) { - ByteBuffer entry = buffer.asReadOnlyBuffer(); - entry.position(entry.position() + position); - - byte[] data = new byte[length]; - entry.get(data); - - CRC32 checksum = new CRC32(); - checksum.update(data); - return checksum.getValue(); - } - - @Override public ByteBuffer read(int position, int length) { ByteBuffer entry = buffer.asReadOnlyBuffer(); entry.position(entry.position() + position); @@ -83,7 +74,10 @@ abstract class FileAccess { } } - + + /** + * The implementation that uses random access file (reads are synchronized). + */ static class Random extends FileAccess { private final RandomAccessFile file; @@ -105,13 +99,6 @@ abstract class FileAccess { } @Override - public long crc32(int position, int length) throws IOException { - CRC32 checksum = new CRC32(); - checksum.update(read(position, length).array()); - return checksum.getValue(); - } - - @Override public synchronized ByteBuffer read(int position, int length) throws IOException { ByteBuffer entry = ByteBuffer.allocate(length); diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileBlob.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileBlob.java deleted file mode 100644 index 87f5735..0000000 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileBlob.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jackrabbit.oak.plugins.segment.file; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; - -import javax.annotation.Nonnull; - -import org.apache.jackrabbit.oak.api.Blob; - -public class FileBlob implements Blob { - - private final String path; - - public FileBlob(String path) { - this.path = path; - } - - @Override - public String getReference() { - return path; // FIXME: should be a secure reference - } - - @Override - public String getContentIdentity() { - return null; - } - - @Nonnull - @Override - public InputStream getNewStream() { - try { - return new FileInputStream(getFile()); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public long length() { - return getFile().length(); - } - - private File getFile() { - return new File(path); - } - - @Override - public boolean equals(Object obj) { - if (obj instanceof FileBlob) { - FileBlob other = (FileBlob) obj; - return this.path.equals(other.path); - } - return super.equals(obj); - } - - @Override - public int hashCode() { - return path.hashCode(); - } -} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java index ca4bbcd..44722f6 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java @@ -80,6 +80,9 @@ import org.apache.jackrabbit.oak.spi.state.NodeState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * The storage implementation for tar files. + */ public class FileStore implements SegmentStore { /** Logger instance */ diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntry.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntry.java index 7b11979..4d7e7dd 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntry.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntry.java @@ -18,6 +18,10 @@ package org.apache.jackrabbit.oak.plugins.segment.file; import java.util.Comparator; +/** + * A file entry location in a tar file. This is used for the index with a tar + * file. + */ class TarEntry { static final Comparator OFFSET_ORDER = new Comparator() { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java index 219d2e9..d17c7be 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java @@ -533,6 +533,17 @@ class TarReader { return findEntry(msb, lsb) != -1; } + /** + * If the given segment is in this file, get the byte buffer that allows + * reading it. + *

+ * Whether or not this will read from the file depends on whether memory + * mapped files are used or not. + * + * @param msb the most significant bits of the segment id + * @param lsb the least significant bits of the segment id + * @return the byte buffer, or null if not in this file + */ ByteBuffer readEntry(long msb, long lsb) throws IOException { int position = findEntry(msb, lsb); if (position != -1) { @@ -544,6 +555,14 @@ class TarReader { } } + /** + * Find the position of the given segment in the tar file. + * It uses the tar index if available. + * + * @param msb the most significant bits of the segment id + * @param lsb the least significant bits of the segment id + * @return the position in the file, or -1 if not found + */ private int findEntry(long msb, long lsb) { // The segment identifiers are randomly generated with uniform // distribution, so we can use interpolation search to find the @@ -790,6 +809,10 @@ class TarReader { return number; } + File getFile() { + return file; + } + //------------------------------------------------------------< Object >-- @Override diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java index 9cdb697..e8d8572 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java @@ -48,16 +48,48 @@ import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * A writer for tar files. It is also used to read entries while the file is + * still open. + */ class TarWriter { /** Logger instance */ private static final Logger log = LoggerFactory.getLogger(TarWriter.class); - /** Magic byte sequence at the end of the index block. */ + /** + * Magic byte sequence at the end of the index block. + *

+ *

    + *
  • For each segment in that file, an index entry that contains the UUID, + * the offset within the file and the size of the segment. Sorted by UUID, + * to allow using interpolation search.
  • + *
  • + * The index footer, which contains metadata of the index (the size, + * checksum).
  • + *
+ */ static final int INDEX_MAGIC = ('\n' << 24) + ('0' << 16) + ('K' << 8) + '\n'; - /** Magic byte sequence at the end of the graph block. */ + /** + * Magic byte sequence at the end of the graph block. + *

+ * The file is read from the end (the tar file is read from the end: the + * last entry is the index, then the graph). File format: + *

    + *
  • 0 padding to make the footer end at a 512 byte boundary
  • + *
  • The list of UUIDs (segments included the graph; this includes + * segments in this tar file, and referenced segments in tar files with a + * lower sequence number). 16 bytes each.
  • + *
  • The graph data. The index of the source segment UUID (in the above + * list, 4 bytes), then the list of referenced segments (the indexes of + * those; 4 bytes each). Then the list is terminated by -1.
  • + *
  • The last part is the footer, which contains metadata of the graph + * (size, checksum, the number of UUIDs).
  • + *
+ * + */ static final int GRAPH_MAGIC = ('\n' << 24) + ('0' << 16) + ('G' << 8) + '\n'; diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/memory/MemoryStore.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/memory/MemoryStore.java index d8a0f32..7137aef 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/memory/MemoryStore.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/memory/MemoryStore.java @@ -37,6 +37,9 @@ import org.apache.jackrabbit.oak.spi.state.NodeState; import com.google.common.collect.Maps; +/** + * A store used for in-memory operations. + */ public class MemoryStore implements SegmentStore { private final SegmentTracker tracker = new SegmentTracker(this); -- 1.8.4.3 From 587db80e715abe35f41e036faf9234a06ecd07e4 Mon Sep 17 00:00:00 2001 From: Thomas Mueller Date: Thu, 31 Jul 2014 12:16:10 +0000 Subject: [PATCH 7/7] OAK-1995 Improved SegmentNodeStore documentation (forgot to add that file) git-svn-id: https://svn.apache.org/repos/asf/jackrabbit/oak/trunk@1614867 13f79535-47bb-0310-9956-ffa450edef68 --- .../oak/plugins/segment/file/FileBlob.java | 82 ++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/FileBlob.java diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/FileBlob.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/FileBlob.java new file mode 100644 index 0000000..a389719 --- /dev/null +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/FileBlob.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.segment.file; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import javax.annotation.Nonnull; + +import org.apache.jackrabbit.oak.api.Blob; + +/** + * A blob as a file in the file system. + * Used for testing. + */ +public class FileBlob implements Blob { + + private final String path; + + public FileBlob(String path) { + this.path = path; + } + + @Override + public String getReference() { + return path; // FIXME: should be a secure reference + } + + @Override + public String getContentIdentity() { + return null; + } + + @Nonnull + @Override + public InputStream getNewStream() { + try { + return new FileInputStream(getFile()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public long length() { + return getFile().length(); + } + + private File getFile() { + return new File(path); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof FileBlob) { + FileBlob other = (FileBlob) obj; + return this.path.equals(other.path); + } + return super.equals(obj); + } + + @Override + public int hashCode() { + return path.hashCode(); + } +} -- 1.8.4.3