Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyChecker.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyChecker.java (revision 1847187) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyChecker.java (working copy) @@ -20,50 +20,29 @@ package org.apache.jackrabbit.oak.segment.file.tooling; import static java.text.DateFormat.getDateTimeInstance; -import static org.apache.jackrabbit.oak.api.Type.BINARIES; -import static org.apache.jackrabbit.oak.api.Type.BINARY; import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount; -import static org.apache.jackrabbit.oak.commons.PathUtils.concat; -import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot; -import static org.apache.jackrabbit.oak.commons.PathUtils.getName; -import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; import static org.apache.jackrabbit.oak.segment.file.FileStoreBuilder.fileStoreBuilder; -import static org.apache.jackrabbit.oak.spi.state.NodeStateUtils.getNode; import java.io.Closeable; import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.io.PrintWriter; import java.text.MessageFormat; -import java.util.ArrayList; import java.util.Date; -import java.util.HashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; -import com.google.common.collect.Sets; -import org.apache.jackrabbit.oak.api.Blob; import org.apache.jackrabbit.oak.api.PropertyState; -import org.apache.jackrabbit.oak.api.Type; -import org.apache.jackrabbit.oak.segment.SegmentBlob; -import org.apache.jackrabbit.oak.segment.SegmentNodeStore; -import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders; -import org.apache.jackrabbit.oak.segment.SegmentNotFoundException; import org.apache.jackrabbit.oak.segment.file.FileStore; import org.apache.jackrabbit.oak.segment.file.FileStoreBuilder; import org.apache.jackrabbit.oak.segment.file.InvalidFileStoreVersionException; -import org.apache.jackrabbit.oak.segment.file.JournalEntry; import org.apache.jackrabbit.oak.segment.file.JournalReader; import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore; -import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitorAdapter; import org.apache.jackrabbit.oak.segment.file.tar.LocalJournalFile; -import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; -import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.segment.spi.monitor.IOMonitorAdapter; /** * Utility for checking the files of a @@ -72,10 +51,6 @@ */ public class ConsistencyChecker implements Closeable { - private static final String CHECKPOINT_INDENT = " "; - - private static final String NO_INDENT = ""; - private static class StatisticsIOMonitor extends IOMonitorAdapter { private final AtomicLong ioOperations = new AtomicLong(0); @@ -106,226 +81,22 @@ private int nodeCount; private int propertyCount; - - private int checkCount; - - /** - * Run a full traversal consistency check. - * - * @param directory directory containing the tar files - * @param journalFileName name of the journal file containing the revision history - * @param debugInterval number of seconds between printing progress information to - * the console during the full traversal phase. - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @param checkHead if {@code true} will check the head - * @param checkpoints collection of checkpoints to be checked - * @param filterPaths collection of repository paths to be checked - * @param ioStatistics if {@code true} prints I/O statistics gathered while consistency - * check was performed - * @param outWriter text output stream writer - * @param errWriter text error stream writer - * @throws IOException - * @throws InvalidFileStoreVersionException - */ - public static void checkConsistency( - File directory, - String journalFileName, - long debugInterval, - boolean checkBinaries, - boolean checkHead, - Set checkpoints, - Set filterPaths, - boolean ioStatistics, - PrintWriter outWriter, - PrintWriter errWriter - ) throws IOException, InvalidFileStoreVersionException { - try ( - JournalReader journal = new JournalReader(new LocalJournalFile(directory, journalFileName)); - ConsistencyChecker checker = new ConsistencyChecker(directory, debugInterval, ioStatistics, outWriter, errWriter) - ) { - Set checkpointsSet = Sets.newLinkedHashSet(); - List headPaths = new ArrayList<>(); - Map> checkpointPaths = new HashMap<>(); - - int revisionCount = 0; - - if (!checkpoints.isEmpty()) { - checkpointsSet.addAll(checkpoints); - - if (checkpointsSet.remove("all")) { - checkpointsSet = Sets - .newLinkedHashSet(SegmentNodeStoreBuilders.builder(checker.store).build().checkpoints()); - } - } - - for (String path : filterPaths) { - if (checkHead) { - headPaths.add(new PathToCheck(path, null)); - checker.checkCount++; - } - - for (String checkpoint : checkpointsSet) { - List pathList = checkpointPaths.get(checkpoint); - if (pathList == null) { - pathList = new ArrayList<>(); - checkpointPaths.put(checkpoint, pathList); - } - - pathList.add(new PathToCheck(path, checkpoint)); - checker.checkCount++; - } - } - - int initialCount = checker.checkCount; - JournalEntry lastValidJournalEntry = null; - - while (journal.hasNext() && checker.checkCount > 0) { - JournalEntry journalEntry = journal.next(); - String revision = journalEntry.getRevision(); - - try { - revisionCount++; - checker.store.setRevision(revision); - boolean overallValid = true; - - SegmentNodeStore sns = SegmentNodeStoreBuilders.builder(checker.store).build(); - - checker.print("\nChecking revision {0}", revision); - - if (checkHead) { - boolean mustCheck = headPaths.stream().anyMatch(p -> p.journalEntry == null); - - if (mustCheck) { - checker.print("\nChecking head\n"); - NodeState root = sns.getRoot(); - overallValid = overallValid && checker.checkPathsAtRoot(headPaths, root, journalEntry, checkBinaries); - } - } - - if (!checkpointsSet.isEmpty()) { - Map checkpointsToCheck = checkpointPaths.entrySet().stream().collect(Collectors.toMap( - Map.Entry::getKey, e -> e.getValue().stream().anyMatch(p -> p.journalEntry == null))); - boolean mustCheck = checkpointsToCheck.values().stream().anyMatch(v -> v == true); - - if (mustCheck) { - checker.print("\nChecking checkpoints"); - - for (String checkpoint : checkpointsSet) { - if (checkpointsToCheck.get(checkpoint)) { - checker.print("\nChecking checkpoint {0}", checkpoint); - - List pathList = checkpointPaths.get(checkpoint); - NodeState root = sns.retrieve(checkpoint); - - if (root == null) { - checker.printError("Checkpoint {0} not found in this revision!", checkpoint); - overallValid = false; - } else { - overallValid = overallValid && checker.checkPathsAtRoot(pathList, root, - journalEntry, checkBinaries); - } - } - } - } - } - - if (overallValid) { - lastValidJournalEntry = journalEntry; - } - } catch (IllegalArgumentException | SegmentNotFoundException e) { - checker.printError("Skipping invalid record id {0}: {1}", revision, e); - } - } - - checker.print("\nSearched through {0} revisions and {1} checkpoints", revisionCount, checkpointsSet.size()); - - if (initialCount == checker.checkCount) { - checker.print("No good revision found"); - } else { - if (checkHead) { - checker.print("\nHead"); - checker.printResults(headPaths, NO_INDENT); - } - - if (!checkpointsSet.isEmpty()) { - checker.print("\nCheckpoints"); - - for (String checkpoint : checkpointsSet) { - List pathList = checkpointPaths.get(checkpoint); - checker.print("- {0}", checkpoint); - checker.printResults(pathList, CHECKPOINT_INDENT); - } - } - - checker.print("\nOverall"); - checker.printOverallResults(lastValidJournalEntry); - } - - if (ioStatistics) { - checker.print( - "[I/O] Segment read: Number of operations: {0}", - checker.statisticsIOMonitor.ioOperations - ); - checker.print( - "[I/O] Segment read: Total size: {0} ({1} bytes)", - humanReadableByteCount(checker.statisticsIOMonitor.readBytes.get()), - checker.statisticsIOMonitor.readBytes - ); - checker.print( - "[I/O] Segment read: Total time: {0} ns", - checker.statisticsIOMonitor.readTime - ); - } - } - } - - private void printResults(List pathList, String indent) { - for (PathToCheck ptc : pathList) { - String revision = ptc.journalEntry != null ? ptc.journalEntry.getRevision() : null; - long timestamp = ptc.journalEntry != null ? ptc.journalEntry.getTimestamp() : -1L; - - print("{0}Latest good revision for path {1} is {2} from {3}", indent, ptc.path, - toString(revision), toString(timestamp)); - } - } - - private void printOverallResults(JournalEntry journalEntry) { - String revision = journalEntry != null ? journalEntry.getRevision() : null; - long timestamp = journalEntry != null ? journalEntry.getTimestamp() : -1L; - - print("Latest good revision for paths and checkpoints checked is {0} from {1}", toString(revision), toString(timestamp)); - } - private static String toString(String revision) { - if (revision != null) { - return revision; - } else { - return "none"; - } - } - - private static String toString(long timestamp) { - if (timestamp != -1L) { - return getDateTimeInstance().format(new Date(timestamp)); - } else { - return "unknown date"; - } - } - /** * Create a new consistency checker instance * - * @param directory directory containing the tar files - * @param debugInterval number of seconds between printing progress information to - * the console during the full traversal phase. - * @param ioStatistics if {@code true} prints I/O statistics gathered while consistency - * check was performed - * @param outWriter text output stream writer - * @param errWriter text error stream writer + * @param directory directory containing the tar files + * @param debugInterval number of seconds between printing progress + * information to the console during the full traversal + * phase. + * @param ioStatistics if {@code true} prints I/O statistics gathered while + * consistency check was performed + * @param outWriter text output stream writer + * @param errWriter text error stream writer * @throws IOException */ public ConsistencyChecker(File directory, long debugInterval, boolean ioStatistics, PrintWriter outWriter, - PrintWriter errWriter) throws IOException, InvalidFileStoreVersionException { + PrintWriter errWriter) throws IOException, InvalidFileStoreVersionException { FileStoreBuilder builder = fileStoreBuilder(directory); if (ioStatistics) { builder.withIOMonitor(statisticsIOMonitor); @@ -337,235 +108,212 @@ } /** - * Checks for consistency a list of paths, relative to the same root. - * - * @param paths paths to check - * @param root root relative to which the paths are retrieved - * @param journalEntry entry containing the current revision checked - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @return {@code true}, if the whole list of paths is consistent + * Run a full traversal consistency check. + * + * @param directory directory containing the tar files + * @param journalFileName name of the journal file containing the revision + * history + * @param checkBinaries if {@code true} full content of binary properties + * will be scanned + * @param checkHead if {@code true} will check the head + * @param checkpoints collection of checkpoints to be checked + * @param filterPaths collection of repository paths to be checked + * @param ioStatistics if {@code true} prints I/O statistics gathered + * while consistency check was performed */ - private boolean checkPathsAtRoot(List paths, NodeState root, JournalEntry journalEntry, - boolean checkBinaries) { - boolean result = true; - - for (PathToCheck ptc : paths) { - if (ptc.journalEntry == null) { - String corruptPath = checkPathAtRoot(ptc, root, checkBinaries); - - if (corruptPath == null) { - print("Path {0} is consistent", ptc.path); - ptc.journalEntry = journalEntry; - checkCount--; - } else { - result = false; - ptc.corruptPaths.add(corruptPath); + public void checkConsistency( + File directory, + String journalFileName, + boolean checkBinaries, + boolean checkHead, + Set checkpoints, + Set filterPaths, + boolean ioStatistics + ) throws IOException { + try (JournalReader journal = new JournalReader(new LocalJournalFile(directory, journalFileName))) { + ConsistencyCheckerTemplate template = new ConsistencyCheckerTemplate() { + + @Override + JournalReader getJournal() { + return journal; } - } - } - - return result; - } - - /** - * Checks the consistency of the supplied {@code ptc} relative to the given {@code root}. - * - * @param ptc path to check, provided there are no corrupt paths. - * @param root root relative to which the path is retrieved - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @return {@code null}, if the content tree rooted at path (possibly under a checkpoint) - * is consistent in this revision or the path of the first inconsistency otherwise. - */ - private String checkPathAtRoot(PathToCheck ptc, NodeState root, boolean checkBinaries) { - String result = null; - for (String corruptPath : ptc.corruptPaths) { - try { - NodeWrapper wrapper = NodeWrapper.deriveTraversableNodeOnPath(root, corruptPath); - result = checkNode(wrapper.node, wrapper.path, checkBinaries); + @Override + ReadOnlyFileStore getReadOnlyFileStore() { + return store; + } - if (result != null) { - return result; + @Override + Set getCheckpointsToCheck() { + return checkpoints; } - } catch (IllegalArgumentException e) { - debug("Path {0} not found", corruptPath); - } - } - nodeCount = 0; - propertyCount = 0; + @Override + Set getPathFilters() { + return filterPaths; + } - print("Checking {0}", ptc.path); - - try { - NodeWrapper wrapper = NodeWrapper.deriveTraversableNodeOnPath(root, ptc.path); - result = checkNodeAndDescendants(wrapper.node, wrapper.path, checkBinaries); - print("Checked {0} nodes and {1} properties", nodeCount, propertyCount); - - return result; - } catch (IllegalArgumentException e) { - printError("Path {0} not found", ptc.path); - return ptc.path; - } - } - - /** - * Checks the consistency of a node and its properties at the given path. - * - * @param node node to be checked - * @param path path of the node - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @return {@code null}, if the node is consistent, - * or the path of the first inconsistency otherwise. - */ - private String checkNode(NodeState node, String path, boolean checkBinaries) { - try { - debug("Traversing {0}", path); - nodeCount++; - for (PropertyState propertyState : node.getProperties()) { - Type type = propertyState.getType(); - boolean checked = false; - - if (type == BINARY) { - checked = traverse(propertyState.getValue(BINARY), checkBinaries); - } else if (type == BINARIES) { - for (Blob blob : propertyState.getValue(BINARIES)) { - checked = checked | traverse(blob, checkBinaries); + @Override + boolean checkHead() { + return checkHead; + } + + @Override + boolean checkBinaries() { + return checkBinaries; + } + + @Override + void onCheckRevision(String revision) { + print("\nChecking revision {0}", revision); + } + + @Override + void onCheckHead() { + print("\nChecking head\n"); + } + + @Override + void onCheckChekpoints() { + print("\nChecking checkpoints"); + } + + @Override + void onCheckCheckpoint(String checkpoint) { + print("\nChecking checkpoint {0}", checkpoint); + } + + @Override + void onCheckpointNotFoundInRevision(String checkpoint) { + printError("Checkpoint {0} not found in this revision!", checkpoint); + } + + @Override + void onCheckRevisionError(String revision, Exception e) { + printError("Skipping invalid record id {0}: {1}", revision, e); + } + + @Override + void onCheckEnd(int checkedRevisionCount, int checkedCheckpointsCount) { + print("\nSearched through {0} revisions and {1} checkpoints", checkedRevisionCount, checkedCheckpointsCount); + } + + @Override + void onNoRevisionFound() { + print("No good revision found"); + } + + private String timestampToString(long timestamp) { + return getDateTimeInstance().format(new Date(timestamp)); + } + + @Override + void onHeadRevisionsFound(List foundRevisions) { + print("\nHead"); + for (FoundRevision foundRevision : foundRevisions) { + print( + "Latest good revision for path {0} is {1} from {2}", + foundRevision.getPath(), + foundRevision.getRevision().orElse("none"), + foundRevision.getTimestamp().map(this::timestampToString).orElse("unknown time") + ); } - } else { - propertyState.getValue(type); - propertyCount++; - checked = true; } - - if (checked) { - debug("Checked {0}/{1}", path, propertyState); + + @Override + void onCheckpointRevisionsFound(Set checkpoints, Map> foundRevisions) { + print("\nCheckpoints"); + for (String checkpoint : checkpoints) { + print("- {0}", checkpoint); + for (FoundRevision foundRevision : foundRevisions.get(checkpoint)) { + print( + " Latest good revision for path {0} is {1} from {2}", + foundRevision.getPath(), + foundRevision.getRevision().orElse("none"), + foundRevision.getTimestamp().map(this::timestampToString).orElse("unknown time") + ); + } + } } - } - - return null; - } catch (RuntimeException | IOException e) { - printError("Error while traversing {0}: {1}", path, e); - return path; - } - } - - /** - * Recursively checks the consistency of a node and its descendants at the given path. - * @param node node to be checked - * @param path path of the node - * @param checkBinaries if {@code true} full content of binary properties will be scanned - * @return {@code null}, if the node is consistent, - * or the path of the first inconsistency otherwise. - */ - private String checkNodeAndDescendants(NodeState node, String path, boolean checkBinaries) { - String result = checkNode(node, path, checkBinaries); - if (result != null) { - return result; - } - - try { - for (ChildNodeEntry cne : node.getChildNodeEntries()) { - String childName = cne.getName(); - NodeState child = cne.getNodeState(); - result = checkNodeAndDescendants(child, concat(path, childName), checkBinaries); - if (result != null) { - return result; + + @Override + void onOverallRevisionFound(Optional revisions, Optional timestamp) { + print("\nOverall"); + print( + "Latest good revision for paths and checkpoints checked is {0} from {1}", + revisions.orElse("none"), + timestamp.map(this::timestampToString).orElse("unknown time") + ); } - } - return null; - } catch (RuntimeException e) { - printError("Error while traversing {0}: {1}", path, e.getMessage()); - return path; - } - } - - static class NodeWrapper { - final NodeState node; - final String path; - - NodeWrapper(NodeState node, String path) { - this.node = node; - this.path = path; - } - - static NodeWrapper deriveTraversableNodeOnPath(NodeState root, String path) { - String parentPath = getParentPath(path); - String name = getName(path); - NodeState parent = getNode(root, parentPath); - - if (!denotesRoot(path)) { - if (!parent.hasChildNode(name)) { - throw new IllegalArgumentException("Invalid path: " + path); - } - - return new NodeWrapper(parent.getChildNode(name), path); - } else { - return new NodeWrapper(parent, parentPath); - } - } - } - - static class PathToCheck { - final String path; - final String checkpoint; - - JournalEntry journalEntry; - Set corruptPaths = new LinkedHashSet<>(); - - PathToCheck(String path, String checkpoint) { - this.path = path; - this.checkpoint = checkpoint; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((checkpoint == null) ? 0 : checkpoint.hashCode()); - result = prime * result + ((path == null) ? 0 : path.hashCode()); - return result; - } + @Override + void onConsistentPath(String path) { + print("Path {0} is consistent", path); + } - @Override - public boolean equals(Object object) { - if (this == object) { - return true; - } else if (object instanceof PathToCheck) { - PathToCheck that = (PathToCheck) object; - return path.equals(that.path) && checkpoint.equals(that.checkpoint); - } else { - return false; - } - } - } + @Override + void onPathNotFound(String path) { + printError("Path {0} not found", path); + } - private boolean traverse(Blob blob, boolean checkBinaries) throws IOException { - if (checkBinaries && !isExternal(blob)) { - InputStream s = blob.getNewStream(); - try { - byte[] buffer = new byte[8192]; - int l = s.read(buffer, 0, buffer.length); - while (l >= 0) { - l = s.read(buffer, 0, buffer.length); + @Override + void onCheckTree(String path) { + nodeCount = 0; + propertyCount = 0; + print("Checking {0}", path); } - } finally { - s.close(); - } - - propertyCount++; - return true; - } - - return false; - } - private static boolean isExternal(Blob b) { - if (b instanceof SegmentBlob) { - return ((SegmentBlob) b).isExternal(); + @Override + void onCheckTreeEnd() { + print("Checked {0} nodes and {1} properties", nodeCount, propertyCount); + } + + @Override + void onCheckNode(String path) { + debug("Traversing {0}", path); + nodeCount++; + } + + @Override + void onCheckProperty() { + propertyCount++; + } + + @Override + void onCheckPropertyEnd(String path, PropertyState property) { + debug("Checked {0}/{1}", path, property); + } + + @Override + void onCheckNodeError(String path, Exception e) { + printError("Error while traversing {0}: {1}", path, e); + } + + @Override + void onCheckTreeError(String path, Exception e) { + printError("Error while traversing {0}: {1}", path, e.getMessage()); + } + + }; + + template.checkConsistency(); + + if (ioStatistics) { + print( + "[I/O] Segment read: Number of operations: {0}", + statisticsIOMonitor.ioOperations.get() + ); + print( + "[I/O] Segment read: Total size: {0} ({1} bytes)", + humanReadableByteCount(statisticsIOMonitor.readBytes.get()), + statisticsIOMonitor.readBytes.get() + ); + print( + "[I/O] Segment read: Total time: {0} ns", + statisticsIOMonitor.readTime.get() + ); + } } - return false; } @Override @@ -573,44 +321,22 @@ store.close(); } - private void print(String format) { - outWriter.println(format); + private void print(String format, Object... arguments) { + outWriter.println(MessageFormat.format(format, arguments)); } - private void print(String format, Object arg) { - outWriter.println(MessageFormat.format(format, arg)); - } - - private void print(String format, Object arg1, Object arg2) { - outWriter.println(MessageFormat.format(format, arg1, arg2)); - } - - private void print(String format, Object arg1, Object arg2, Object arg3, Object arg4) { - outWriter.println(MessageFormat.format(format, arg1, arg2, arg3, arg4)); - } - - private void printError(String format, Object arg) { - errWriter.println(MessageFormat.format(format, arg)); - } - - private void printError(String format, Object arg1, Object arg2) { - errWriter.println(MessageFormat.format(format, arg1, arg2)); + private void printError(String format, Object... args) { + errWriter.println(MessageFormat.format(format, args)); } private long ts; - private void debug(String format, Object arg) { + private void debug(String format, Object... arg) { if (debug()) { print(format, arg); } } - private void debug(String format, Object arg1, Object arg2) { - if (debug()) { - print(format, arg1, arg2); - } - } - private boolean debug() { // Avoid calling System.currentTimeMillis(), which is slow on some systems. if (debugInterval == Long.MAX_VALUE) { @@ -627,4 +353,5 @@ return false; } } + } Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyCheckerTemplate.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyCheckerTemplate.java (nonexistent) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyCheckerTemplate.java (working copy) @@ -0,0 +1,561 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.segment.file.tooling; + +import static org.apache.jackrabbit.oak.api.Type.BINARIES; +import static org.apache.jackrabbit.oak.api.Type.BINARY; +import static org.apache.jackrabbit.oak.commons.PathUtils.concat; +import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot; +import static org.apache.jackrabbit.oak.commons.PathUtils.getName; +import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; +import static org.apache.jackrabbit.oak.spi.state.NodeStateUtils.getNode; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import com.google.common.collect.Sets; +import org.apache.jackrabbit.oak.api.Blob; +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.segment.SegmentBlob; +import org.apache.jackrabbit.oak.segment.SegmentNodeStore; +import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders; +import org.apache.jackrabbit.oak.segment.SegmentNotFoundException; +import org.apache.jackrabbit.oak.segment.file.JournalEntry; +import org.apache.jackrabbit.oak.segment.file.JournalReader; +import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore; +import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; +import org.apache.jackrabbit.oak.spi.state.NodeState; + +abstract class ConsistencyCheckerTemplate { + + private static class NodeWrapper { + + final NodeState node; + + final String path; + + NodeWrapper(NodeState node, String path) { + this.node = node; + this.path = path; + } + + static NodeWrapper deriveTraversableNodeOnPath(NodeState root, String path) { + String parentPath = getParentPath(path); + String name = getName(path); + NodeState parent = getNode(root, parentPath); + + if (!denotesRoot(path)) { + if (!parent.hasChildNode(name)) { + throw new IllegalArgumentException("Invalid path: " + path); + } + + return new NodeWrapper(parent.getChildNode(name), path); + } else { + return new NodeWrapper(parent, parentPath); + } + } + } + + private static class PathToCheck { + + final String path; + + final String checkpoint; + + JournalEntry journalEntry; + + Set corruptPaths = new LinkedHashSet<>(); + + PathToCheck(String path, String checkpoint) { + this.path = path; + this.checkpoint = checkpoint; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((checkpoint == null) ? 0 : checkpoint.hashCode()); + result = prime * result + ((path == null) ? 0 : path.hashCode()); + return result; + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } else if (object instanceof PathToCheck) { + PathToCheck that = (PathToCheck) object; + return path.equals(that.path) && checkpoint.equals(that.checkpoint); + } else { + return false; + } + } + } + + static class FoundRevision { + + private final String path; + + private final String revision; + + private final Long timestamp; + + private FoundRevision(String path, String revision, Long timestamp) { + this.path = path; + this.revision = revision; + this.timestamp = timestamp; + } + + public String getPath() { + return path; + } + + public Optional getRevision() { + return Optional.ofNullable(revision); + } + + public Optional getTimestamp() { + return Optional.ofNullable(timestamp); + } + } + + abstract JournalReader getJournal(); + + abstract ReadOnlyFileStore getReadOnlyFileStore(); + + Set getCheckpointsToCheck() { + return Collections.emptySet(); + } + + Set getPathFilters() { + return Collections.emptySet(); + } + + boolean checkHead() { + return false; + } + + boolean checkBinaries() { + return false; + } + + void onCheckRevision(String revision) { + // Do nothing. + } + + void onCheckHead() { + // Do nothing. + } + + void onCheckChekpoints() { + // Do nothing. + } + + void onCheckCheckpoint(String checkpoint) { + // Do nothing. + } + + void onCheckpointNotFoundInRevision(String checkpoint) { + // Do nothing. + } + + void onCheckRevisionError(String revision, Exception e) { + // Do nothing. + } + + void onCheckEnd(int checkedRevisionCount, int checkedCheckpointsCount) { + // Do nothing. + } + + void onNoRevisionFound() { + // Do nothing. + } + + void onHeadRevisionsFound(List foundRevisions) { + // Do nothing. + } + + void onCheckpointRevisionsFound(Set checkpoints, Map> foundRevisions) { + // Do nothing. + } + + void onOverallRevisionFound(Optional revisions, Optional timestamp) { + // Do nothing. + } + + void onConsistentPath(String path) { + // Do nothing. + } + + void onPathNotFound(String path) { + // Do nothing. + } + + void onCheckTree(String path) { + // Do nothing. + } + + void onCheckTreeEnd() { + // Do nothing. + } + + void onCheckNode(String path) { + // Do nothing. + } + + void onCheckProperty() { + // Do nothing. + } + + void onCheckPropertyEnd(String path, PropertyState property) { + // Do nothing. + } + + void onCheckNodeError(String path, Exception e) { + // Do nothing. + } + + void onCheckTreeError(String path, Exception e) { + // Do nothing. + } + + public final void checkConsistency() throws IOException { + JournalReader journal = getJournal(); + ReadOnlyFileStore store = getReadOnlyFileStore(); + + Set checkpointsSet = Sets.newLinkedHashSet(); + List headPaths = new ArrayList<>(); + Map> checkpointPaths = new HashMap<>(); + + int revisionCount = 0; + + Set initialCheckpointsToCheck = getCheckpointsToCheck(); + if (!initialCheckpointsToCheck.isEmpty()) { + checkpointsSet.addAll(initialCheckpointsToCheck); + if (checkpointsSet.remove("all")) { + checkpointsSet = Sets.newLinkedHashSet(SegmentNodeStoreBuilders.builder(store).build().checkpoints()); + } + } + + int checkCount = 0; + for (String path : getPathFilters()) { + if (checkHead()) { + headPaths.add(new PathToCheck(path, null)); + checkCount++; + } + + for (String checkpoint : checkpointsSet) { + List pathList = checkpointPaths.get(checkpoint); + if (pathList == null) { + pathList = new ArrayList<>(); + checkpointPaths.put(checkpoint, pathList); + } + + pathList.add(new PathToCheck(path, checkpoint)); + checkCount++; + } + } + + int initialCount = checkCount; + JournalEntry lastValidJournalEntry = null; + + while (journal.hasNext() && checkCount > 0) { + JournalEntry journalEntry = journal.next(); + String revision = journalEntry.getRevision(); + + try { + revisionCount++; + store.setRevision(revision); + boolean overallValid = true; + + SegmentNodeStore sns = SegmentNodeStoreBuilders.builder(store).build(); + + onCheckRevision(revision); + + if (checkHead()) { + boolean mustCheck = headPaths.stream().anyMatch(p -> p.journalEntry == null); + + if (mustCheck) { + onCheckHead(); + NodeState root = sns.getRoot(); + boolean result = true; + for (PathToCheck ptc : headPaths) { + if (ptc.journalEntry == null) { + String corruptPath = checkPathAtRoot(ptc, root, checkBinaries()); + + if (corruptPath == null) { + onConsistentPath(ptc.path); + ptc.journalEntry = journalEntry; + checkCount--; + } else { + result = false; + ptc.corruptPaths.add(corruptPath); + } + } + } + overallValid = overallValid && result; + } + } + + if (!checkpointsSet.isEmpty()) { + Map checkpointsToCheck = checkpointPaths.entrySet().stream().collect(Collectors.toMap( + Map.Entry::getKey, e -> e.getValue().stream().anyMatch(p -> p.journalEntry == null))); + boolean mustCheck = checkpointsToCheck.values().stream().anyMatch(v -> v); + + if (mustCheck) { + onCheckChekpoints(); + + for (String checkpoint : checkpointsSet) { + if (checkpointsToCheck.get(checkpoint)) { + onCheckCheckpoint(checkpoint); + + List pathList = checkpointPaths.get(checkpoint); + NodeState root = sns.retrieve(checkpoint); + + if (root == null) { + onCheckpointNotFoundInRevision(checkpoint); + overallValid = false; + } else { + boolean result = true; + for (PathToCheck ptc : pathList) { + if (ptc.journalEntry == null) { + String corruptPath = checkPathAtRoot(ptc, root, checkBinaries()); + + if (corruptPath == null) { + onConsistentPath(ptc.path); + ptc.journalEntry = journalEntry; + checkCount--; + } else { + result = false; + ptc.corruptPaths.add(corruptPath); + } + } + } + overallValid = overallValid && result; + } + } + } + } + } + + if (overallValid) { + lastValidJournalEntry = journalEntry; + } + } catch (IllegalArgumentException | SegmentNotFoundException e) { + onCheckRevisionError(revision, e); + } + } + + onCheckEnd(revisionCount, checkpointsSet.size()); + + if (initialCount == checkCount) { + onNoRevisionFound(); + } else { + if (checkHead()) { + onHeadRevisionsFound(toFoundRevisionsList(headPaths)); + } + if (!checkpointsSet.isEmpty()) { + onCheckpointRevisionsFound(checkpointsSet, toFoundRevisionsMap(checkpointPaths)); + } + if (lastValidJournalEntry == null) { + onOverallRevisionFound(Optional.empty(), Optional.empty()); + } else { + onOverallRevisionFound(Optional.of(lastValidJournalEntry.getRevision()), Optional.of(lastValidJournalEntry.getTimestamp())); + } + } + } + + private List toFoundRevisionsList(List pathsToCheck) { + List foundRevisions = new ArrayList<>(pathsToCheck.size()); + for (PathToCheck pathToCheck : pathsToCheck) { + if (pathToCheck.journalEntry == null) { + foundRevisions.add(new FoundRevision(pathToCheck.path, null, null)); + } else { + foundRevisions.add(new FoundRevision(pathToCheck.path, pathToCheck.journalEntry.getRevision(), pathToCheck.journalEntry.getTimestamp())); + } + } + return foundRevisions; + } + + private Map> toFoundRevisionsMap(Map> pathsToCheck) { + Map> foundRevisions = new HashMap<>(); + for (Entry> entry : pathsToCheck.entrySet()) { + foundRevisions.put(entry.getKey(), toFoundRevisionsList(entry.getValue())); + } + return foundRevisions; + } + + /** + * Checks the consistency of the supplied {@code ptc} relative to the given + * {@code root}. + * + * @param ptc path to check, provided there are no corrupt paths. + * @param root root relative to which the path is retrieved + * @param checkBinaries if {@code true} full content of binary properties + * will be scanned + * @return {@code null}, if the content tree rooted at path (possibly under + * a checkpoint) is consistent in this revision or the path of the first + * inconsistency otherwise. + */ + private String checkPathAtRoot(PathToCheck ptc, NodeState root, boolean checkBinaries) { + String result; + + for (String corruptPath : ptc.corruptPaths) { + try { + NodeWrapper wrapper = NodeWrapper.deriveTraversableNodeOnPath(root, corruptPath); + result = checkNode(wrapper.node, wrapper.path, checkBinaries); + + if (result != null) { + return result; + } + } catch (IllegalArgumentException e) { + onPathNotFound(corruptPath); + } + } + + onCheckTree(ptc.path); + + try { + NodeWrapper wrapper = NodeWrapper.deriveTraversableNodeOnPath(root, ptc.path); + result = checkNodeAndDescendants(wrapper.node, wrapper.path, checkBinaries); + onCheckTreeEnd(); + + return result; + } catch (IllegalArgumentException e) { + onPathNotFound(ptc.path); + return ptc.path; + } + } + + /** + * Checks the consistency of a node and its properties at the given path. + * + * @param node node to be checked + * @param path path of the node + * @param checkBinaries if {@code true} full content of binary properties + * will be scanned + * @return {@code null}, if the node is consistent, or the path of the first + * inconsistency otherwise. + */ + private String checkNode(NodeState node, String path, boolean checkBinaries) { + try { + onCheckNode(path); + for (PropertyState propertyState : node.getProperties()) { + Type type = propertyState.getType(); + boolean checked = false; + + if (type == BINARY) { + checked = traverse(propertyState.getValue(BINARY), checkBinaries); + } else if (type == BINARIES) { + for (Blob blob : propertyState.getValue(BINARIES)) { + checked = checked | traverse(blob, checkBinaries); + } + } else { + propertyState.getValue(type); + onCheckProperty(); + checked = true; + } + + if (checked) { + onCheckPropertyEnd(path, propertyState); + } + } + + return null; + } catch (RuntimeException | IOException e) { + onCheckNodeError(path, e); + return path; + } + } + + /** + * Recursively checks the consistency of a node and its descendants at the + * given path. + * + * @param node node to be checked + * @param path path of the node + * @param checkBinaries if {@code true} full content of binary properties + * will be scanned + * @return {@code null}, if the node is consistent, or the path of the first + * inconsistency otherwise. + */ + private String checkNodeAndDescendants(NodeState node, String path, boolean checkBinaries) { + String result = checkNode(node, path, checkBinaries); + if (result != null) { + return result; + } + + try { + for (ChildNodeEntry cne : node.getChildNodeEntries()) { + String childName = cne.getName(); + NodeState child = cne.getNodeState(); + result = checkNodeAndDescendants(child, concat(path, childName), checkBinaries); + if (result != null) { + return result; + } + } + + return null; + } catch (RuntimeException e) { + onCheckTreeError(path, e); + return path; + } + } + + private boolean traverse(Blob blob, boolean checkBinaries) throws IOException { + if (checkBinaries && !isExternal(blob)) { + InputStream s = blob.getNewStream(); + try { + byte[] buffer = new byte[8192]; + int l = s.read(buffer, 0, buffer.length); + while (l >= 0) { + l = s.read(buffer, 0, buffer.length); + } + } finally { + s.close(); + } + + onCheckProperty(); + return true; + } + + return false; + } + + private static boolean isExternal(Blob b) { + if (b instanceof SegmentBlob) { + return ((SegmentBlob) b).isExternal(); + } + return false; + } + +} Property changes on: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tooling/ConsistencyCheckerTemplate.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Check.java =================================================================== --- oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Check.java (revision 1847187) +++ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Check.java (working copy) @@ -239,18 +239,15 @@ } public int run() { - try { - ConsistencyChecker.checkConsistency( + try (ConsistencyChecker checker = new ConsistencyChecker(path, debugInterval, ioStatistics, outWriter, errWriter)) { + checker.checkConsistency( path, journal, - debugInterval, checkBinaries, checkHead, checkpoints, filterPaths, - ioStatistics, - outWriter, - errWriter + ioStatistics ); return 0; } catch (Exception e) {