Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyChecker.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyChecker.java (revision 1847468) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyChecker.java (working copy) @@ -20,50 +20,34 @@ package org.apache.jackrabbit.oak.segment.file.tooling; import static java.text.DateFormat.getDateTimeInstance; -import static org.apache.jackrabbit.oak.api.Type.BINARIES; -import static org.apache.jackrabbit.oak.api.Type.BINARY; import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount; -import static org.apache.jackrabbit.oak.commons.PathUtils.concat; -import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot; -import static org.apache.jackrabbit.oak.commons.PathUtils.getName; -import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; import static org.apache.jackrabbit.oak.segment.file.FileStoreBuilder.fileStoreBuilder; -import static org.apache.jackrabbit.oak.spi.state.NodeStateUtils.getNode; import java.io.Closeable; import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.io.PrintWriter; import java.text.MessageFormat; -import java.util.ArrayList; import java.util.Date; -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; +import com.google.common.base.Strings; import com.google.common.collect.Sets; -import org.apache.jackrabbit.oak.api.Blob; import org.apache.jackrabbit.oak.api.PropertyState; -import org.apache.jackrabbit.oak.api.Type; -import org.apache.jackrabbit.oak.segment.SegmentBlob; -import org.apache.jackrabbit.oak.segment.SegmentNodeStore; import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders; -import org.apache.jackrabbit.oak.segment.SegmentNotFoundException; import org.apache.jackrabbit.oak.segment.file.FileStore; import org.apache.jackrabbit.oak.segment.file.FileStoreBuilder; import org.apache.jackrabbit.oak.segment.file.InvalidFileStoreVersionException; -import org.apache.jackrabbit.oak.segment.file.JournalEntry; import org.apache.jackrabbit.oak.segment.file.JournalReader; import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore; -import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitorAdapter; import org.apache.jackrabbit.oak.segment.file.tar.LocalJournalFile; -import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; -import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.segment.file.tooling.ConsistencyCheckerTemplate.ConsistencyCheckResult; +import org.apache.jackrabbit.oak.segment.file.tooling.ConsistencyCheckerTemplate.Revision; +import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitorAdapter; /** * Utility for checking the files of a @@ -72,10 +56,6 @@ */ public class ConsistencyChecker implements Closeable { - private static final String CHECKPOINT_INDENT = " "; - - private static final String NO_INDENT = ""; - private static class StatisticsIOMonitor extends IOMonitorAdapter { private final AtomicLong ioOperations = new AtomicLong(0); @@ -106,226 +86,22 @@ private int nodeCount; private int propertyCount; - - private int checkCount; - - /** - * Run a full traversal consistency check. - * - * @param directory directory containing the tar files - * @param journalFileName name of the journal file containing the revision history - * @param debugInterval number of seconds between printing progress information to - * the console during the full traversal phase. - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @param checkHead if {@code true} will check the head - * @param checkpoints collection of checkpoints to be checked - * @param filterPaths collection of repository paths to be checked - * @param ioStatistics if {@code true} prints I/O statistics gathered while consistency - * check was performed - * @param outWriter text output stream writer - * @param errWriter text error stream writer - * @throws IOException - * @throws InvalidFileStoreVersionException - */ - public static void checkConsistency( - File directory, - String journalFileName, - long debugInterval, - boolean checkBinaries, - boolean checkHead, - Set checkpoints, - Set filterPaths, - boolean ioStatistics, - PrintWriter outWriter, - PrintWriter errWriter - ) throws IOException, InvalidFileStoreVersionException { - try ( - JournalReader journal = new JournalReader(new LocalJournalFile(directory, journalFileName)); - ConsistencyChecker checker = new ConsistencyChecker(directory, debugInterval, ioStatistics, outWriter, errWriter) - ) { - Set checkpointsSet = Sets.newLinkedHashSet(); - List headPaths = new ArrayList<>(); - Map> checkpointPaths = new HashMap<>(); - - int revisionCount = 0; - - if (!checkpoints.isEmpty()) { - checkpointsSet.addAll(checkpoints); - - if (checkpointsSet.remove("all")) { - checkpointsSet = Sets - .newLinkedHashSet(SegmentNodeStoreBuilders.builder(checker.store).build().checkpoints()); - } - } - - for (String path : filterPaths) { - if (checkHead) { - headPaths.add(new PathToCheck(path, null)); - checker.checkCount++; - } - - for (String checkpoint : checkpointsSet) { - List pathList = checkpointPaths.get(checkpoint); - if (pathList == null) { - pathList = new ArrayList<>(); - checkpointPaths.put(checkpoint, pathList); - } - - pathList.add(new PathToCheck(path, checkpoint)); - checker.checkCount++; - } - } - - int initialCount = checker.checkCount; - JournalEntry lastValidJournalEntry = null; - - while (journal.hasNext() && checker.checkCount > 0) { - JournalEntry journalEntry = journal.next(); - String revision = journalEntry.getRevision(); - - try { - revisionCount++; - checker.store.setRevision(revision); - boolean overallValid = true; - - SegmentNodeStore sns = SegmentNodeStoreBuilders.builder(checker.store).build(); - - checker.print("\nChecking revision {0}", revision); - - if (checkHead) { - boolean mustCheck = headPaths.stream().anyMatch(p -> p.journalEntry == null); - - if (mustCheck) { - checker.print("\nChecking head\n"); - NodeState root = sns.getRoot(); - overallValid = overallValid && checker.checkPathsAtRoot(headPaths, root, journalEntry, checkBinaries); - } - } - - if (!checkpointsSet.isEmpty()) { - Map checkpointsToCheck = checkpointPaths.entrySet().stream().collect(Collectors.toMap( - Map.Entry::getKey, e -> e.getValue().stream().anyMatch(p -> p.journalEntry == null))); - boolean mustCheck = checkpointsToCheck.values().stream().anyMatch(v -> v == true); - - if (mustCheck) { - checker.print("\nChecking checkpoints"); - - for (String checkpoint : checkpointsSet) { - if (checkpointsToCheck.get(checkpoint)) { - checker.print("\nChecking checkpoint {0}", checkpoint); - - List pathList = checkpointPaths.get(checkpoint); - NodeState root = sns.retrieve(checkpoint); - - if (root == null) { - checker.printError("Checkpoint {0} not found in this revision!", checkpoint); - overallValid = false; - } else { - overallValid = overallValid && checker.checkPathsAtRoot(pathList, root, - journalEntry, checkBinaries); - } - } - } - } - } - - if (overallValid) { - lastValidJournalEntry = journalEntry; - } - } catch (IllegalArgumentException | SegmentNotFoundException e) { - checker.printError("Skipping invalid record id {0}: {1}", revision, e); - } - } - - checker.print("\nSearched through {0} revisions and {1} checkpoints", revisionCount, checkpointsSet.size()); - - if (initialCount == checker.checkCount) { - checker.print("No good revision found"); - } else { - if (checkHead) { - checker.print("\nHead"); - checker.printResults(headPaths, NO_INDENT); - } - - if (!checkpointsSet.isEmpty()) { - checker.print("\nCheckpoints"); - - for (String checkpoint : checkpointsSet) { - List pathList = checkpointPaths.get(checkpoint); - checker.print("- {0}", checkpoint); - checker.printResults(pathList, CHECKPOINT_INDENT); - } - } - - checker.print("\nOverall"); - checker.printOverallResults(lastValidJournalEntry); - } - - if (ioStatistics) { - checker.print( - "[I/O] Segment read: Number of operations: {0}", - checker.statisticsIOMonitor.ioOperations - ); - checker.print( - "[I/O] Segment read: Total size: {0} ({1} bytes)", - humanReadableByteCount(checker.statisticsIOMonitor.readBytes.get()), - checker.statisticsIOMonitor.readBytes - ); - checker.print( - "[I/O] Segment read: Total time: {0} ns", - checker.statisticsIOMonitor.readTime - ); - } - } - } - - private void printResults(List pathList, String indent) { - for (PathToCheck ptc : pathList) { - String revision = ptc.journalEntry != null ? ptc.journalEntry.getRevision() : null; - long timestamp = ptc.journalEntry != null ? ptc.journalEntry.getTimestamp() : -1L; - - print("{0}Latest good revision for path {1} is {2} from {3}", indent, ptc.path, - toString(revision), toString(timestamp)); - } - } - - private void printOverallResults(JournalEntry journalEntry) { - String revision = journalEntry != null ? journalEntry.getRevision() : null; - long timestamp = journalEntry != null ? journalEntry.getTimestamp() : -1L; - - print("Latest good revision for paths and checkpoints checked is {0} from {1}", toString(revision), toString(timestamp)); - } - private static String toString(String revision) { - if (revision != null) { - return revision; - } else { - return "none"; - } - } - - private static String toString(long timestamp) { - if (timestamp != -1L) { - return getDateTimeInstance().format(new Date(timestamp)); - } else { - return "unknown date"; - } - } - /** * Create a new consistency checker instance * - * @param directory directory containing the tar files - * @param debugInterval number of seconds between printing progress information to - * the console during the full traversal phase. - * @param ioStatistics if {@code true} prints I/O statistics gathered while consistency - * check was performed - * @param outWriter text output stream writer - * @param errWriter text error stream writer + * @param directory directory containing the tar files + * @param debugInterval number of seconds between printing progress + * information to the console during the full traversal + * phase. + * @param ioStatistics if {@code true} prints I/O statistics gathered while + * consistency check was performed + * @param outWriter text output stream writer + * @param errWriter text error stream writer * @throws IOException */ public ConsistencyChecker(File directory, long debugInterval, boolean ioStatistics, PrintWriter outWriter, - PrintWriter errWriter) throws IOException, InvalidFileStoreVersionException { + PrintWriter errWriter) throws IOException, InvalidFileStoreVersionException { FileStoreBuilder builder = fileStoreBuilder(directory); if (ioStatistics) { builder.withIOMonitor(statisticsIOMonitor); @@ -337,235 +113,205 @@ } /** - * Checks for consistency a list of paths, relative to the same root. - * - * @param paths paths to check - * @param root root relative to which the paths are retrieved - * @param journalEntry entry containing the current revision checked - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @return {@code true}, if the whole list of paths is consistent + * Run a full traversal consistency check. + * + * @param directory directory containing the tar files + * @param journalFileName name of the journal file containing the revision + * history + * @param checkBinaries if {@code true} full content of binary properties + * will be scanned + * @param checkHead if {@code true} will check the head + * @param checkpointsToCheck collection of checkpoints to be checked + * @param filterPaths collection of repository paths to be checked + * @param ioStatistics if {@code true} prints I/O statistics gathered + * while consistency check was performed */ - private boolean checkPathsAtRoot(List paths, NodeState root, JournalEntry journalEntry, - boolean checkBinaries) { - boolean result = true; - - for (PathToCheck ptc : paths) { - if (ptc.journalEntry == null) { - String corruptPath = checkPathAtRoot(ptc, root, checkBinaries); - - if (corruptPath == null) { - print("Path {0} is consistent", ptc.path); - ptc.journalEntry = journalEntry; - checkCount--; - } else { - result = false; - ptc.corruptPaths.add(corruptPath); + public void checkConsistency( + File directory, + String journalFileName, + boolean checkBinaries, + boolean checkHead, + Set checkpointsToCheck, + Set filterPaths, + boolean ioStatistics + ) throws IOException { + try (JournalReader journal = new JournalReader(new LocalJournalFile(directory, journalFileName))) { + ConsistencyCheckerTemplate template = new ConsistencyCheckerTemplate() { + + @Override + void onCheckRevision(String revision) { + print("\nChecking revision {0}", revision); } - } - } - - return result; - } - - /** - * Checks the consistency of the supplied {@code ptc} relative to the given {@code root}. - * - * @param ptc path to check, provided there are no corrupt paths. - * @param root root relative to which the path is retrieved - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @return {@code null}, if the content tree rooted at path (possibly under a checkpoint) - * is consistent in this revision or the path of the first inconsistency otherwise. - */ - private String checkPathAtRoot(PathToCheck ptc, NodeState root, boolean checkBinaries) { - String result = null; - for (String corruptPath : ptc.corruptPaths) { - try { - NodeWrapper wrapper = NodeWrapper.deriveTraversableNodeOnPath(root, corruptPath); - result = checkNode(wrapper.node, wrapper.path, checkBinaries); + @Override + void onCheckHead() { + print("\nChecking head\n"); + } - if (result != null) { - return result; + @Override + void onCheckChekpoints() { + print("\nChecking checkpoints"); } - } catch (IllegalArgumentException e) { - debug("Path {0} not found", corruptPath); - } - } - nodeCount = 0; - propertyCount = 0; + @Override + void onCheckCheckpoint(String checkpoint) { + print("\nChecking checkpoint {0}", checkpoint); + } - print("Checking {0}", ptc.path); - - try { - NodeWrapper wrapper = NodeWrapper.deriveTraversableNodeOnPath(root, ptc.path); - result = checkNodeAndDescendants(wrapper.node, wrapper.path, checkBinaries); - print("Checked {0} nodes and {1} properties", nodeCount, propertyCount); - - return result; - } catch (IllegalArgumentException e) { - printError("Path {0} not found", ptc.path); - return ptc.path; - } - } - - /** - * Checks the consistency of a node and its properties at the given path. - * - * @param node node to be checked - * @param path path of the node - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @return {@code null}, if the node is consistent, - * or the path of the first inconsistency otherwise. - */ - private String checkNode(NodeState node, String path, boolean checkBinaries) { - try { - debug("Traversing {0}", path); - nodeCount++; - for (PropertyState propertyState : node.getProperties()) { - Type type = propertyState.getType(); - boolean checked = false; - - if (type == BINARY) { - checked = traverse(propertyState.getValue(BINARY), checkBinaries); - } else if (type == BINARIES) { - for (Blob blob : propertyState.getValue(BINARIES)) { - checked = checked | traverse(blob, checkBinaries); - } - } else { - propertyState.getValue(type); + @Override + void onCheckpointNotFoundInRevision(String checkpoint) { + printError("Checkpoint {0} not found in this revision!", checkpoint); + } + + @Override + void onCheckRevisionError(String revision, Exception e) { + printError("Skipping invalid record id {0}: {1}", revision, e); + } + + @Override + void onConsistentPath(String path) { + print("Path {0} is consistent", path); + } + + @Override + void onPathNotFound(String path) { + printError("Path {0} not found", path); + } + + @Override + void onCheckTree(String path) { + nodeCount = 0; + propertyCount = 0; + print("Checking {0}", path); + } + + @Override + void onCheckTreeEnd() { + print("Checked {0} nodes and {1} properties", nodeCount, propertyCount); + } + + @Override + void onCheckNode(String path) { + debug("Traversing {0}", path); + nodeCount++; + } + + @Override + void onCheckProperty() { propertyCount++; - checked = true; } - - if (checked) { - debug("Checked {0}/{1}", path, propertyState); + + @Override + void onCheckPropertyEnd(String path, PropertyState property) { + debug("Checked {0}/{1}", path, property); } - } - - return null; - } catch (RuntimeException | IOException e) { - printError("Error while traversing {0}: {1}", path, e); - return path; - } - } - - /** - * Recursively checks the consistency of a node and its descendants at the given path. - * @param node node to be checked - * @param path path of the node - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @return {@code null}, if the node is consistent, - * or the path of the first inconsistency otherwise. - */ - private String checkNodeAndDescendants(NodeState node, String path, boolean checkBinaries) { - String result = checkNode(node, path, checkBinaries); - if (result != null) { - return result; - } - - try { - for (ChildNodeEntry cne : node.getChildNodeEntries()) { - String childName = cne.getName(); - NodeState child = cne.getNodeState(); - result = checkNodeAndDescendants(child, concat(path, childName), checkBinaries); - if (result != null) { - return result; + + @Override + void onCheckNodeError(String path, Exception e) { + printError("Error while traversing {0}: {1}", path, e); } - } - return null; - } catch (RuntimeException e) { - printError("Error while traversing {0}: {1}", path, e.getMessage()); - return path; - } - } - - static class NodeWrapper { - final NodeState node; - final String path; - - NodeWrapper(NodeState node, String path) { - this.node = node; - this.path = path; - } - - static NodeWrapper deriveTraversableNodeOnPath(NodeState root, String path) { - String parentPath = getParentPath(path); - String name = getName(path); - NodeState parent = getNode(root, parentPath); - - if (!denotesRoot(path)) { - if (!parent.hasChildNode(name)) { - throw new IllegalArgumentException("Invalid path: " + path); + @Override + void onCheckTreeError(String path, Exception e) { + printError("Error while traversing {0}: {1}", path, e.getMessage()); } - - return new NodeWrapper(parent.getChildNode(name), path); - } else { - return new NodeWrapper(parent, parentPath); + + }; + + Set checkpoints = checkpointsToCheck; + if (checkpointsToCheck.contains("all")) { + checkpoints = Sets.newLinkedHashSet(SegmentNodeStoreBuilders.builder(store).build().checkpoints()); } - } - } - - static class PathToCheck { - final String path; - final String checkpoint; - - JournalEntry journalEntry; - Set corruptPaths = new LinkedHashSet<>(); - - PathToCheck(String path, String checkpoint) { - this.path = path; - this.checkpoint = checkpoint; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((checkpoint == null) ? 0 : checkpoint.hashCode()); - result = prime * result + ((path == null) ? 0 : path.hashCode()); - return result; - } - @Override - public boolean equals(Object object) { - if (this == object) { - return true; - } else if (object instanceof PathToCheck) { - PathToCheck that = (PathToCheck) object; - return path.equals(that.path) && checkpoint.equals(that.checkpoint); + ConsistencyCheckResult result = template.checkConsistency( + store, + journal, + checkHead, + checkpoints, + filterPaths, + checkBinaries + ); + + print("\nSearched through {0} revisions and {1} checkpoints", result.getCheckedRevisionsCount(), checkpoints.size()); + + if (hasAnyRevision(result)) { + if (checkHead) { + print("\nHead"); + for (Entry e : result.getHeadRevisions().entrySet()) { + printRevision(0, e.getKey(), e.getValue()); + } + } + if (checkpoints.size() > 0) { + print("\nCheckpoints"); + for (String checkpoint : result.getCheckpointRevisions().keySet()) { + print("- {0}", checkpoint); + for (Entry e : result.getCheckpointRevisions().get(checkpoint).entrySet()) { + printRevision(2, e.getKey(), e.getValue()); + } + + } + } + print("\nOverall"); + printOverallRevision(result.getOverallRevision()); } else { - return false; + print("No good revision found"); } - } - } - private boolean traverse(Blob blob, boolean checkBinaries) throws IOException { - if (checkBinaries && !isExternal(blob)) { - InputStream s = blob.getNewStream(); - try { - byte[] buffer = new byte[8192]; - int l = s.read(buffer, 0, buffer.length); - while (l >= 0) { - l = s.read(buffer, 0, buffer.length); - } - } finally { - s.close(); + if (ioStatistics) { + print( + "[I/O] Segment read: Number of operations: {0}", + statisticsIOMonitor.ioOperations.get() + ); + print( + "[I/O] Segment read: Total size: {0} ({1} bytes)", + humanReadableByteCount(statisticsIOMonitor.readBytes.get()), + statisticsIOMonitor.readBytes.get() + ); + print( + "[I/O] Segment read: Total time: {0} ns", + statisticsIOMonitor.readTime.get() + ); } - - propertyCount++; - return true; } - - return false; } - private static boolean isExternal(Blob b) { - if (b instanceof SegmentBlob) { - return ((SegmentBlob) b).isExternal(); - } - return false; + private void printRevision(int indent, String path, Revision revision) { + Optional r = Optional.ofNullable(revision); + print( + "{0}Latest good revision for path {1} is {2} from {3}", + Strings.repeat(" ", indent), + path, + r.map(Revision::getRevision).orElse("none"), + r.map(Revision::getTimestamp).map(ConsistencyChecker::timestampToString).orElse("unknown time") + ); + } + + private void printOverallRevision(Revision revision) { + Optional r = Optional.ofNullable(revision); + print( + "Latest good revision for paths and checkpoints checked is {0} from {1}", + r.map(Revision::getRevision).orElse("none"), + r.map(Revision::getTimestamp).map(ConsistencyChecker::timestampToString).orElse("unknown time") + ); + } + + private static boolean hasAnyRevision(ConsistencyCheckResult result) { + return hasAnyHeadRevision(result) || hasAnyCheckpointRevision(result); + } + + private static boolean hasAnyHeadRevision(ConsistencyCheckResult result) { + return result.getHeadRevisions() + .values() + .stream() + .anyMatch(Objects::nonNull); + } + + private static boolean hasAnyCheckpointRevision(ConsistencyCheckResult result) { + return result.getCheckpointRevisions() + .values() + .stream() + .flatMap(m -> m.values().stream()) + .anyMatch(Objects::nonNull); } @Override @@ -573,44 +319,22 @@ store.close(); } - private void print(String format) { - outWriter.println(format); - } - - private void print(String format, Object arg) { - outWriter.println(MessageFormat.format(format, arg)); - } - - private void print(String format, Object arg1, Object arg2) { - outWriter.println(MessageFormat.format(format, arg1, arg2)); - } - - private void print(String format, Object arg1, Object arg2, Object arg3, Object arg4) { - outWriter.println(MessageFormat.format(format, arg1, arg2, arg3, arg4)); - } - - private void printError(String format, Object arg) { - errWriter.println(MessageFormat.format(format, arg)); + private void print(String format, Object... arguments) { + outWriter.println(MessageFormat.format(format, arguments)); } - private void printError(String format, Object arg1, Object arg2) { - errWriter.println(MessageFormat.format(format, arg1, arg2)); + private void printError(String format, Object... args) { + errWriter.println(MessageFormat.format(format, args)); } private long ts; - private void debug(String format, Object arg) { + private void debug(String format, Object... arg) { if (debug()) { print(format, arg); } } - private void debug(String format, Object arg1, Object arg2) { - if (debug()) { - print(format, arg1, arg2); - } - } - private boolean debug() { // Avoid calling System.currentTimeMillis(), which is slow on some systems. if (debugInterval == Long.MAX_VALUE) { @@ -627,4 +351,10 @@ return false; } } + + private static String timestampToString(long timestamp) { + return getDateTimeInstance().format(new Date(timestamp)); + } + + } Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyCheckerTemplate.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyCheckerTemplate.java (nonexistent) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyCheckerTemplate.java (working copy) @@ -0,0 +1,545 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.segment.file.tooling; + +import static org.apache.jackrabbit.oak.api.Type.BINARIES; +import static org.apache.jackrabbit.oak.api.Type.BINARY; +import static org.apache.jackrabbit.oak.commons.PathUtils.concat; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.jackrabbit.oak.api.Blob; +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.segment.SegmentBlob; +import org.apache.jackrabbit.oak.segment.SegmentNodeStore; +import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders; +import org.apache.jackrabbit.oak.segment.SegmentNotFoundException; +import org.apache.jackrabbit.oak.segment.file.JournalEntry; +import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore; +import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStateUtils; + +abstract class ConsistencyCheckerTemplate { + + private static NodeState getDescendantOrNull(NodeState root, String path) { + NodeState descendant = NodeStateUtils.getNode(root, path); + if (descendant.exists()) { + return descendant; + } + return null; + } + + private static class PathToCheck { + + final String path; + + final String checkpoint; + + JournalEntry journalEntry; + + Set corruptPaths = new LinkedHashSet<>(); + + PathToCheck(String path, String checkpoint) { + this.path = path; + this.checkpoint = checkpoint; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((checkpoint == null) ? 0 : checkpoint.hashCode()); + result = prime * result + ((path == null) ? 0 : path.hashCode()); + return result; + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } else if (object instanceof PathToCheck) { + PathToCheck that = (PathToCheck) object; + return path.equals(that.path) && checkpoint.equals(that.checkpoint); + } else { + return false; + } + } + } + + static class FoundRevision { + + private final String path; + + private final String revision; + + private final Long timestamp; + + private FoundRevision(String path, String revision, Long timestamp) { + this.path = path; + this.revision = revision; + this.timestamp = timestamp; + } + + public String getPath() { + return path; + } + + public Optional getRevision() { + return Optional.ofNullable(revision); + } + + public Optional getTimestamp() { + return Optional.ofNullable(timestamp); + } + } + + void onCheckRevision(String revision) { + // Do nothing. + } + + void onCheckHead() { + // Do nothing. + } + + void onCheckChekpoints() { + // Do nothing. + } + + void onCheckCheckpoint(String checkpoint) { + // Do nothing. + } + + void onCheckpointNotFoundInRevision(String checkpoint) { + // Do nothing. + } + + void onCheckRevisionError(String revision, Exception e) { + // Do nothing. + } + + void onConsistentPath(String path) { + // Do nothing. + } + + void onPathNotFound(String path) { + // Do nothing. + } + + void onCheckTree(String path) { + // Do nothing. + } + + void onCheckTreeEnd() { + // Do nothing. + } + + void onCheckNode(String path) { + // Do nothing. + } + + void onCheckProperty() { + // Do nothing. + } + + void onCheckPropertyEnd(String path, PropertyState property) { + // Do nothing. + } + + void onCheckNodeError(String path, Exception e) { + // Do nothing. + } + + void onCheckTreeError(String path, Exception e) { + // Do nothing. + } + + public static class Revision { + + private final String revision; + + private final long timestamp; + + private Revision(String revision, long timestamp) { + this.revision = revision; + this.timestamp = timestamp; + } + + public String getRevision() { + return revision; + } + + public long getTimestamp() { + return timestamp; + } + + } + + public static class ConsistencyCheckResult { + + private ConsistencyCheckResult() { + // Prevent external instantiation. + } + + private final Map headRevisions = new HashMap<>(); + + private final Map> checkpointRevisions = new HashMap<>(); + + private Revision overallRevision; + + private int checkedRevisionsCount; + + public int getCheckedRevisionsCount() { + return checkedRevisionsCount; + } + + public Revision getOverallRevision() { + return overallRevision; + } + + public Map getHeadRevisions() { + return headRevisions; + } + + public Map> getCheckpointRevisions() { + return checkpointRevisions; + } + + } + + public final ConsistencyCheckResult checkConsistency( + ReadOnlyFileStore store, + Iterator journal, + boolean head, + Set checkpoints, + Set paths, + boolean binaries + ) { + List headPaths = new ArrayList<>(); + Map> checkpointPaths = new HashMap<>(); + + int revisionCount = 0; + + int checkCount = 0; + for (String path : paths) { + if (head) { + headPaths.add(new PathToCheck(path, null)); + checkCount++; + } + + for (String checkpoint : checkpoints) { + List pathList = checkpointPaths.get(checkpoint); + if (pathList == null) { + pathList = new ArrayList<>(); + checkpointPaths.put(checkpoint, pathList); + } + + pathList.add(new PathToCheck(path, checkpoint)); + checkCount++; + } + } + + JournalEntry lastValidJournalEntry = null; + + while (journal.hasNext() && checkCount > 0) { + JournalEntry journalEntry = journal.next(); + String revision = journalEntry.getRevision(); + + try { + revisionCount++; + store.setRevision(revision); + boolean overallValid = true; + + SegmentNodeStore sns = SegmentNodeStoreBuilders.builder(store).build(); + + onCheckRevision(revision); + + if (head) { + boolean mustCheck = headPaths.stream().anyMatch(p -> p.journalEntry == null); + + if (mustCheck) { + onCheckHead(); + NodeState root = sns.getRoot(); + boolean result = true; + for (PathToCheck ptc : headPaths) { + if (ptc.journalEntry == null) { + String corruptPath = checkPathAtRoot(ptc, root, binaries); + + if (corruptPath == null) { + onConsistentPath(ptc.path); + ptc.journalEntry = journalEntry; + checkCount--; + } else { + result = false; + ptc.corruptPaths.add(corruptPath); + } + } + } + overallValid = overallValid && result; + } + } + + if (!checkpoints.isEmpty()) { + Map checkpointsToCheck = checkpointPaths.entrySet().stream().collect(Collectors.toMap( + Map.Entry::getKey, e -> e.getValue().stream().anyMatch(p -> p.journalEntry == null))); + boolean mustCheck = checkpointsToCheck.values().stream().anyMatch(v -> v); + + if (mustCheck) { + onCheckChekpoints(); + + for (String checkpoint : checkpoints) { + if (checkpointsToCheck.get(checkpoint)) { + onCheckCheckpoint(checkpoint); + + List pathList = checkpointPaths.get(checkpoint); + NodeState root = sns.retrieve(checkpoint); + + if (root == null) { + onCheckpointNotFoundInRevision(checkpoint); + overallValid = false; + } else { + boolean result = true; + for (PathToCheck ptc : pathList) { + if (ptc.journalEntry == null) { + String corruptPath = checkPathAtRoot(ptc, root, binaries); + + if (corruptPath == null) { + onConsistentPath(ptc.path); + ptc.journalEntry = journalEntry; + checkCount--; + } else { + result = false; + ptc.corruptPaths.add(corruptPath); + } + } + } + overallValid = overallValid && result; + } + } + } + } + } + + if (overallValid) { + lastValidJournalEntry = journalEntry; + } + } catch (IllegalArgumentException | SegmentNotFoundException e) { + onCheckRevisionError(revision, e); + } + } + + ConsistencyCheckResult result = new ConsistencyCheckResult(); + + result.checkedRevisionsCount = revisionCount; + result.overallRevision = newRevisionOrNull(lastValidJournalEntry); + + for (PathToCheck path : headPaths) { + result.headRevisions.put(path.path, newRevisionOrNull(path.journalEntry)); + } + + for (String checkpoint : checkpoints) { + for (PathToCheck path : checkpointPaths.get(checkpoint)) { + result.checkpointRevisions + .computeIfAbsent(checkpoint, s -> new HashMap<>()) + .put(path.path, newRevisionOrNull(path.journalEntry)); + } + } + + return result; + } + + private static Revision newRevisionOrNull(JournalEntry entry) { + if (entry == null) { + return null; + } + return new Revision(entry.getRevision(), entry.getTimestamp()); + } + + private List toFoundRevisionsList(List pathsToCheck) { + List foundRevisions = new ArrayList<>(pathsToCheck.size()); + for (PathToCheck pathToCheck : pathsToCheck) { + if (pathToCheck.journalEntry == null) { + foundRevisions.add(new FoundRevision(pathToCheck.path, null, null)); + } else { + foundRevisions.add(new FoundRevision(pathToCheck.path, pathToCheck.journalEntry.getRevision(), pathToCheck.journalEntry.getTimestamp())); + } + } + return foundRevisions; + } + + private Map> toFoundRevisionsMap(Map> pathsToCheck) { + Map> foundRevisions = new HashMap<>(); + for (Entry> entry : pathsToCheck.entrySet()) { + foundRevisions.put(entry.getKey(), toFoundRevisionsList(entry.getValue())); + } + return foundRevisions; + } + + /** + * Checks the consistency of the supplied {@code ptc} relative to the given + * {@code root}. + * + * @param ptc path to check, provided there are no corrupt paths. + * @param root root relative to which the path is retrieved + * @param checkBinaries if {@code true} full content of binary properties + * will be scanned + * @return {@code null}, if the content tree rooted at path (possibly under + * a checkpoint) is consistent in this revision or the path of the first + * inconsistency otherwise. + */ + private String checkPathAtRoot(PathToCheck ptc, NodeState root, boolean checkBinaries) { + String result; + + for (String corruptPath : ptc.corruptPaths) { + NodeState node = getDescendantOrNull(root, corruptPath); + if (node == null) { + onPathNotFound(corruptPath); + continue; + } + result = checkNode(node, corruptPath, checkBinaries); + if (result != null) { + return result; + } + } + + onCheckTree(ptc.path); + + NodeState node = getDescendantOrNull(root, ptc.path); + if (node == null) { + onPathNotFound(ptc.path); + return ptc.path; + } + result = checkNodeAndDescendants(node, ptc.path, checkBinaries); + onCheckTreeEnd(); + return result; + } + + /** + * Checks the consistency of a node and its properties at the given path. + * + * @param node node to be checked + * @param path path of the node + * @param checkBinaries if {@code true} full content of binary properties + * will be scanned + * @return {@code null}, if the node is consistent, or the path of the first + * inconsistency otherwise. + */ + private String checkNode(NodeState node, String path, boolean checkBinaries) { + try { + onCheckNode(path); + for (PropertyState propertyState : node.getProperties()) { + Type type = propertyState.getType(); + boolean checked = false; + + if (type == BINARY) { + checked = traverse(propertyState.getValue(BINARY), checkBinaries); + } else if (type == BINARIES) { + for (Blob blob : propertyState.getValue(BINARIES)) { + checked = checked | traverse(blob, checkBinaries); + } + } else { + propertyState.getValue(type); + onCheckProperty(); + checked = true; + } + + if (checked) { + onCheckPropertyEnd(path, propertyState); + } + } + + return null; + } catch (RuntimeException | IOException e) { + onCheckNodeError(path, e); + return path; + } + } + + /** + * Recursively checks the consistency of a node and its descendants at the + * given path. + * + * @param node node to be checked + * @param path path of the node + * @param checkBinaries if {@code true} full content of binary properties + * will be scanned + * @return {@code null}, if the node is consistent, or the path of the first + * inconsistency otherwise. + */ + private String checkNodeAndDescendants(NodeState node, String path, boolean checkBinaries) { + String result = checkNode(node, path, checkBinaries); + if (result != null) { + return result; + } + + try { + for (ChildNodeEntry cne : node.getChildNodeEntries()) { + String childName = cne.getName(); + NodeState child = cne.getNodeState(); + result = checkNodeAndDescendants(child, concat(path, childName), checkBinaries); + if (result != null) { + return result; + } + } + + return null; + } catch (RuntimeException e) { + onCheckTreeError(path, e); + return path; + } + } + + private boolean traverse(Blob blob, boolean checkBinaries) throws IOException { + if (checkBinaries && !isExternal(blob)) { + try (InputStream s = blob.getNewStream()) { + byte[] buffer = new byte[8192]; + int l = s.read(buffer, 0, buffer.length); + while (l >= 0) { + l = s.read(buffer, 0, buffer.length); + } + } + onCheckProperty(); + return true; + } + + return false; + } + + private static boolean isExternal(Blob b) { + if (b instanceof SegmentBlob) { + return ((SegmentBlob) b).isExternal(); + } + return false; + } + +} + Property changes on: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyCheckerTemplate.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Check.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Check.java (revision 1847468) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Check.java (working copy) @@ -239,18 +239,15 @@ } public int run() { - try { - ConsistencyChecker.checkConsistency( + try (ConsistencyChecker checker = new ConsistencyChecker(path, debugInterval, ioStatistics, outWriter, errWriter)) { + checker.checkConsistency( path, journal, - debugInterval, checkBinaries, checkHead, checkpoints, filterPaths, - ioStatistics, - outWriter, - errWriter + ioStatistics ); return 0; } catch (Exception e) { Index: oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/tooling/CheckInvalidRepositoryTest.java =================================================================== --- oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/tooling/CheckInvalidRepositoryTest.java (revision 1847468) +++ oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/tooling/CheckInvalidRepositoryTest.java (working copy) @@ -19,7 +19,6 @@ package org.apache.jackrabbit.oak.segment.file.tooling; import java.io.File; -import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.HashSet; @@ -37,7 +36,7 @@ import org.junit.Test; /** - * Tests for {@link CheckCommand} assuming an invalid repository. + * Tests for {@link ConsistencyChecker} assuming an invalid repository. */ public class CheckInvalidRepositoryTest extends CheckRepositoryTestBase { @@ -82,7 +81,7 @@ } @Test - public void testPartialBrokenPathWithoutValidRevision() { + public void testPartialBrokenPathWithoutValidRevision() throws Exception { StringWriter strOut = new StringWriter(); StringWriter strErr = new StringWriter(); @@ -218,7 +217,9 @@ } @Test - public void testLargeJournal() throws IOException { + public void testLargeJournal() throws Exception { + corruptPathFromCheckpoint(); + StringWriter strOut = new StringWriter(); StringWriter strErr = new StringWriter();