Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/backup/BinaryAwareApplyDiff.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/backup/BinaryAwareApplyDiff.java (revision 0) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/backup/BinaryAwareApplyDiff.java (revision 0) @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.backup; + +import static com.google.common.collect.Lists.newArrayList; +import static com.google.common.collect.Maps.newHashMap; +import static org.apache.jackrabbit.oak.api.Type.BINARIES; +import static org.apache.jackrabbit.oak.api.Type.BINARY; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.jackrabbit.oak.api.Blob; +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.IOUtils; +import org.apache.jackrabbit.oak.plugins.memory.BinaryPropertyState; +import org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState; +import org.apache.jackrabbit.oak.plugins.memory.MultiBinaryPropertyState; +import org.apache.jackrabbit.oak.plugins.segment.RecordId; +import org.apache.jackrabbit.oak.plugins.segment.Segment; +import org.apache.jackrabbit.oak.plugins.segment.SegmentBlob; +import org.apache.jackrabbit.oak.plugins.segment.SegmentWriter; +import org.apache.jackrabbit.oak.spi.state.ApplyDiff; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.hash.Hashing; + +public class BinaryAwareApplyDiff extends ApplyDiff { + + private static final Logger log = LoggerFactory + .getLogger(BinaryAwareApplyDiff.class); + + private final SegmentWriter writer; + + /** + * Map from {@link #getBlobKey(Blob) blob keys} to matching compacted blob + * record identifiers. Used to de-duplicate copies of the same binary + * values. + */ + private final Map> binaries; + + public BinaryAwareApplyDiff(NodeBuilder builder, SegmentWriter writer) { + super(builder); + this.writer = writer; + this.binaries = newHashMap(); + } + + BinaryAwareApplyDiff(BinaryAwareApplyDiff parent, NodeBuilder builder) { + super(builder); + this.writer = parent.writer; + this.binaries = parent.binaries; + } + + @Override + public boolean propertyAdded(PropertyState after) { + return super.propertyAdded(process(after)); + } + + @Override + public boolean propertyChanged(PropertyState before, PropertyState after) { + return super.propertyChanged(before, process(after)); + } + + @Override + public boolean childNodeAdded(String name, NodeState after) { + return after.compareAgainstBaseState(EmptyNodeState.EMPTY_NODE, + new BinaryAwareApplyDiff(this, builder.child(name))); + } + + @Override + public boolean childNodeChanged(String name, NodeState before, + NodeState after) { + return after.compareAgainstBaseState(before, new BinaryAwareApplyDiff( + this, builder.getChildNode(name))); + } + + private PropertyState process(PropertyState property) { + Type type = property.getType(); + if (type == BINARY) { + String name = property.getName(); + Blob blob = process(property.getValue(Type.BINARY)); + return BinaryPropertyState.binaryProperty(name, blob); + } else if (type == BINARIES) { + String name = property.getName(); + List blobs = new ArrayList(); + for (Blob blob : property.getValue(BINARIES)) { + Blob b = process(blob); + blobs.add(b); + } + return MultiBinaryPropertyState.binaryPropertyFromBlob(name, blobs); + } + return property; + } + + /** + * Compacts (and de-duplicates) the given blob. + * + * @param blob + * blob to be compacted + * @return compacted blob + */ + private Blob process(Blob blob) { + if (blob instanceof SegmentBlob) { + SegmentBlob sb = (SegmentBlob) blob; + + try { + // if the blob is inlined or external, just clone it + if (sb.isExternal() || sb.length() < Segment.MEDIUM_LIMIT) { + return sb.clone(writer); + } + + // look if the exact same binary has been cloned + String key = getBlobKey(blob); + List ids = binaries.get(key); + if (ids != null) { + for (RecordId duplicateId : ids) { + if (new SegmentBlob(duplicateId).equals(blob)) { + return new SegmentBlob(duplicateId); + } + } + } + + // if not, clone the blob and keep track of the result + sb = sb.clone(writer); + if (ids == null) { + ids = newArrayList(); + binaries.put(key, ids); + } + ids.add(sb.getRecordId()); + + return sb; + } catch (IOException e) { + log.warn("Failed to process a blob", e); + // fall through + } + } + + // no way to compact this blob, so we'll just keep it as-is + + System.out.println("not a segment blob"); + + return blob; + } + + private static String getBlobKey(Blob blob) throws IOException { + InputStream stream = blob.getNewStream(); + try { + byte[] buffer = new byte[SegmentWriter.BLOCK_SIZE]; + int n = IOUtils.readFully(stream, buffer, 0, buffer.length); + return blob.length() + ":" + Hashing.sha1().hashBytes(buffer, 0, n); + } finally { + stream.close(); + } + } + +} Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/backup/FileStoreBackup.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/backup/FileStoreBackup.java (revision 1606853) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/backup/FileStoreBackup.java (working copy) @@ -22,10 +22,12 @@ import java.io.IOException; import java.util.concurrent.TimeUnit; +import org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState; import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeBuilder; import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState; +import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeStore; +import org.apache.jackrabbit.oak.plugins.segment.SegmentWriter; import org.apache.jackrabbit.oak.plugins.segment.file.FileStore; -import org.apache.jackrabbit.oak.spi.state.ApplyDiff; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.spi.state.NodeStore; import org.slf4j.Logger; @@ -54,26 +56,28 @@ // 2. init filestore FileStore backup = new FileStore(destination, MAX_FILE_SIZE, false); + SegmentWriter writer = backup.getTracker().getWriter(); try { SegmentNodeState state = backup.getHead(); SegmentNodeBuilder builder = state.builder(); + NodeState before = null; String beforeCheckpoint = state.getString("checkpoint"); if (beforeCheckpoint == null) { // 3.1 no stored checkpoint, so do the initial full backup - builder.setChildNode("root", current); + before = EmptyNodeState.EMPTY_NODE; } else { // 3.2 try to retrieve the previously backed up checkpoint - NodeState before = store.retrieve(beforeCheckpoint); + before = store.retrieve(beforeCheckpoint); if (before == null) { // the previous checkpoint is no longer available, // so use the backed up state as the basis of the // incremental backup diff before = state.getChildNode("root"); } - current.compareAgainstBaseState( - before, new ApplyDiff(builder.child("root"))); } + current.compareAgainstBaseState(before, + new BinaryAwareApplyDiff(builder.child("root"), writer)); builder.setProperty("checkpoint", checkpoint); // 4. commit the backup @@ -84,4 +88,20 @@ log.debug("Backup finished in {} ms.", System.currentTimeMillis() - s); } + + public static void main(String[] args) throws Exception{ + File src = new File("/Users/aparvule/ci/oak/cq/repository/segmentstore"); + FileStore source = new FileStore(src, 256, true); + NodeStore store = new SegmentNodeStore(source); + + File destination = new File( + "/Users/aparvule/ci/oak/cq/repository/r2"); + + try { + FileStoreBackup.backup(store, destination); + } finally { + source.close(); + } + + } } Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/backup/FileStoreRestore.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/backup/FileStoreRestore.java (revision 1606853) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/backup/FileStoreRestore.java (working copy) @@ -23,10 +23,10 @@ import org.apache.jackrabbit.oak.api.CommitFailedException; import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState; +import org.apache.jackrabbit.oak.plugins.segment.SegmentWriter; import org.apache.jackrabbit.oak.plugins.segment.file.FileStore; import org.apache.jackrabbit.oak.spi.commit.CommitInfo; import org.apache.jackrabbit.oak.spi.commit.EmptyHook; -import org.apache.jackrabbit.oak.spi.state.ApplyDiff; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.spi.state.NodeStore; @@ -52,20 +52,22 @@ // 2. init filestore FileStore restore = new FileStore(source, MAX_FILE_SIZE, false); + SegmentWriter writer = restore.getTracker().getWriter(); try { SegmentNodeState state = restore.getHead(); - restore(state.getChildNode("root"), store); + restore(state.getChildNode("root"), store, writer); } finally { restore.close(); } } - public static void restore(NodeState source, NodeStore store) - throws CommitFailedException { + private static void restore(NodeState source, NodeStore store, + SegmentWriter writer) throws CommitFailedException { long s = System.currentTimeMillis(); NodeState after = store.getRoot(); NodeBuilder builder = after.builder(); - source.compareAgainstBaseState(after, new ApplyDiff(builder)); + source.compareAgainstBaseState(after, new BinaryAwareApplyDiff(builder, + writer)); store.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); log.debug("Restore finished in {} ms.", System.currentTimeMillis() - s); } Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java (revision 1606853) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java (working copy) @@ -34,7 +34,7 @@ public class SegmentBlob extends Record implements Blob { - SegmentBlob(RecordId id) { + public SegmentBlob(RecordId id) { super(id); } Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java (revision 1606853) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java (working copy) @@ -78,7 +78,7 @@ private static final Logger log = LoggerFactory.getLogger(SegmentWriter.class); - static final int BLOCK_SIZE = 1 << 12; // 4kB + public static final int BLOCK_SIZE = 1 << 12; // 4kB private static byte[] createNewBuffer() { byte[] buffer = new byte[Segment.MAX_SEGMENT_SIZE];