diff --git a/oak-run/README.md b/oak-run/README.md index 2f8c3f9..1eb7b02 100644 --- a/oak-run/README.md +++ b/oak-run/README.md @@ -563,6 +563,8 @@ The following operations are available: --collect-garbage - Execute garbage collection on the data store. If only mark phase to be run specify a true parameter. --check-consistency - List all the missing blobs by doing a consistency check. + --dump-ref - List all the blob references in the node store + --dump-id - List all the ids in the data store The following options are available: @@ -582,6 +584,10 @@ The following options are available: use comma as a delimiter). This option is NOT available for the collect-garbage operation. If specified with collect-garbage, the command execution will throw an exception. + --verbosePathInclusionRegex- A Regex that can be used to limit the scan during traversal to a specific inclusion list of nodes identified by the regex. + For example , to look for blob refrences under specific paths such as /b1/b2/foo, /c1/c2/foo under the rootPath /a + use --verboseRootPath /a --verbosePathInclusionRegex /*/*/foo + This option is only available when --verboseRootPath is used. - Path to the tar segment store or the segment azure uri as specified in http://jackrabbit.apache.org/oak/docs/nodestore/segment/overview.html#remote-segment-stores or if Mongo NodeStore then the mongo uri. diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java index 056f0f3..726fe14 100644 --- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java +++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java @@ -518,9 +518,10 @@ public class DataStoreCheckCommand implements Command { Iterator iterator = p.getValue(Type.BINARIES).iterator(); while (iterator.hasNext()) { - id = iterator.next().getContentIdentity(); + Blob blob = iterator.next(); + id = blob.getContentIdentity(); // Ignore inline encoded binaries in document mk - if (id == null || p.getValue(Type.BINARY).isInlined()) { + if (id == null || blob.isInlined()) { continue; } writeAsLine(writer, diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java index d635152..2a75a6a 100644 --- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java +++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java @@ -16,6 +16,7 @@ */ package org.apache.jackrabbit.oak.run; +import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.lang.management.ManagementFactory; @@ -23,22 +24,27 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.function.Function; import com.google.common.base.Joiner; import com.google.common.base.Splitter; +import com.google.common.base.Stopwatch; +import com.google.common.io.Closeables; import com.google.common.io.Closer; +import com.google.common.io.Files; import joptsimple.OptionParser; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.LineIterator; import org.apache.commons.io.filefilter.FileFilterUtils; import org.apache.jackrabbit.oak.api.Blob; import org.apache.jackrabbit.oak.api.PropertyState; import org.apache.jackrabbit.oak.api.Type; -import org.apache.jackrabbit.oak.commons.FileIOUtils; import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.commons.concurrent.ExecutorCloser; import org.apache.jackrabbit.oak.commons.io.BurnOnCloseFileIterator; @@ -70,7 +76,12 @@ import org.slf4j.LoggerFactory; import static com.google.common.base.Charsets.UTF_8; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR; +import static com.google.common.base.Stopwatch.createStarted; import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort; +import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeAsLine; +import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings; +import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak; import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.AZURE; import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.FAKE; import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.FDS; @@ -86,10 +97,19 @@ public class DataStoreCommand implements Command { public static final String NAME = "datastore"; private static final String summary = "Provides DataStore management operations"; + private static final String DELIM = ","; private Options opts; private DataStoreOptions dataStoreOpts; + private static final Comparator idComparator = new Comparator() { + @Override + public int compare(String s1, String s2) { + return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]); + } + }; + + @Override public void execute(String... args) throws Exception { OptionParser parser = new OptionParser(); @@ -155,11 +175,76 @@ public class DataStoreCommand implements Command { private void execute(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts, Options opts, Closer closer) throws Exception { + final BlobStoreOptions optionBean = opts.getOptionBean(BlobStoreOptions.class); try (Closer metricsCloser = Utils.createCloserWithShutdownHook()) { MetricsExporterFixture metricsExporterFixture = MetricsExporterFixtureProvider.create(dataStoreOpts, fixture.getWhiteboard()); metricsCloser.register(metricsExporterFixture); + if (dataStoreOpts.dumpRefs()) { + final File referencesTemp = File.createTempFile("traverseref", null, new File(opts.getTempDirectory())); + final BufferedWriter writer = Files.newWriter(referencesTemp, UTF_8); + + boolean threw = true; + try { + BlobReferenceRetriever retriever = getRetriever(fixture, dataStoreOpts, opts); + + retriever.collectReferences(new ReferenceCollector() { + @Override + public void addReference(String blobId, String nodeId) { + try { + Iterator idIter = ((GarbageCollectableBlobStore) fixture.getBlobStore()).resolveChunks(blobId); + + while (idIter.hasNext()) { + String id = idIter.next(); + final Joiner delimJoiner = Joiner.on(DELIM).skipNulls(); + // If --verbose is present, convert blob ID to a backend friendly format and + // concat the path that has the ref. Otherwise simply add the ID to the o/p file + // as it is. + String line = dataStoreOpts.isVerbose() ? VerboseIdLogger.encodeId(delimJoiner.join(id, + escapeLineBreak(nodeId)), + optionBean.getBlobStoreType()) : id; + writeAsLine(writer, line, true); + } + } catch (Exception e) { + throw new RuntimeException("Error in retrieving references", e); + } + } + }); + + writer.flush(); + threw = false; + + sort(referencesTemp, idComparator); + + File parent = new File(dataStoreOpts.getOutDir().getAbsolutePath(), "dump"); + long startTime = System.currentTimeMillis(); + final File references = new File(parent, "dump-ref-" + startTime); + FileUtils.forceMkdir(parent); + + FileUtils.copyFile(referencesTemp, references); + } finally { + Closeables.close(writer, threw); + } + + } else if (dataStoreOpts.dumpIds()) { + final File blobidsTemp = File.createTempFile("blobidstemp", null, new File(opts.getTempDirectory())); + + retrieveBlobIds((GarbageCollectableBlobStore) fixture.getBlobStore(), blobidsTemp); + + File parent = new File(dataStoreOpts.getOutDir().getAbsolutePath(), "dump"); + long startTime = System.currentTimeMillis(); + final File ids = new File(parent, "dump-id-" + startTime); + FileUtils.forceMkdir(parent); + + + if (dataStoreOpts.isVerbose()) { + verboseIds(optionBean, blobidsTemp, ids); + } else { + FileUtils.copyFile(blobidsTemp, ids); + } + + } else { MarkSweepGarbageCollector collector = getCollector(fixture, dataStoreOpts, opts, closer); if (dataStoreOpts.checkConsistency()) { long missing = collector.checkConsistency(); @@ -173,6 +258,7 @@ public class DataStoreCommand implements Command { } } } + } private static void setupDirectories(DataStoreOptions opts) throws IOException { if (opts.getOutDir().exists()) { @@ -184,34 +270,65 @@ public class DataStoreCommand implements Command { private static MarkSweepGarbageCollector getCollector(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts, Options opts, Closer closer) throws IOException { + BlobReferenceRetriever retriever = getRetriever(fixture, dataStoreOpts, opts); + + ExecutorService service = Executors.newSingleThreadExecutor(); + closer.register(new ExecutorCloser(service)); + + String repositoryId = ClusterRepositoryInfo.getId(fixture.getStore()); + checkNotNull(repositoryId); + + MarkSweepGarbageCollector collector = + new MarkSweepGarbageCollector(retriever, (GarbageCollectableBlobStore) fixture.getBlobStore(), service, + dataStoreOpts.getOutDir().getAbsolutePath(), dataStoreOpts.getBatchCount(), + SECONDS.toMillis(dataStoreOpts.getBlobGcMaxAgeInSecs()), dataStoreOpts.checkConsistencyAfterGC(), + repositoryId, fixture.getWhiteboard(), getService(fixture.getWhiteboard(), StatisticsProvider.class)); + collector.setTraceOutput(true); + + return collector; + } + + private static BlobReferenceRetriever getRetriever(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts, Options opts) { BlobReferenceRetriever retriever; if (opts.getCommonOpts().isDocument() && !dataStoreOpts.hasVerboseRootPaths()) { retriever = new DocumentBlobReferenceRetriever((DocumentNodeStore) fixture.getStore()); } else { if (dataStoreOpts.isVerbose()) { List rootPathList = dataStoreOpts.getVerboseRootPaths(); + List roothPathInclusionRegex = dataStoreOpts.getVerboseInclusionRegex(); retriever = new NodeTraverserReferenceRetriever(fixture.getStore(), - (String[]) rootPathList.toArray(new String[rootPathList.size()])); + rootPathList.toArray(new String[rootPathList.size()]), + roothPathInclusionRegex.toArray(new String[roothPathInclusionRegex.size()])); } else { ReadOnlyFileStore fileStore = getService(fixture.getWhiteboard(), ReadOnlyFileStore.class); retriever = new SegmentBlobReferenceRetriever(fileStore); } } + return retriever; + } - ExecutorService service = Executors.newSingleThreadExecutor(); - closer.register(new ExecutorCloser(service)); + private static void retrieveBlobIds(GarbageCollectableBlobStore blobStore, File blob) + throws Exception { - String repositoryId = ClusterRepositoryInfo.getId(fixture.getStore()); - checkNotNull(repositoryId); + System.out.println("Starting dump of blob ids"); + Stopwatch watch = createStarted(); - MarkSweepGarbageCollector collector = - new MarkSweepGarbageCollector(retriever, (GarbageCollectableBlobStore) fixture.getBlobStore(), service, - dataStoreOpts.getOutDir().getAbsolutePath(), dataStoreOpts.getBatchCount(), - SECONDS.toMillis(dataStoreOpts.getBlobGcMaxAgeInSecs()), dataStoreOpts.checkConsistencyAfterGC(), - repositoryId, fixture.getWhiteboard(), getService(fixture.getWhiteboard(), StatisticsProvider.class)); - collector.setTraceOutput(true); + Iterator blobIter = blobStore.getAllChunkIds(0); + int count = writeStrings(blobIter, blob, false); - return collector; + sort(blob); + System.out.println(count + " blob ids found"); + System.out.println("Finished in " + watch.elapsed(SECONDS) + " seconds"); + } + + private static void verboseIds(BlobStoreOptions blobOpts, File readFile, File writeFile) throws IOException { + LineIterator idIterator = FileUtils.lineIterator(readFile, UTF_8.name()); + + try (BurnOnCloseFileIterator iterator = + new BurnOnCloseFileIterator(idIterator, readFile, + (Function) input -> VerboseIdLogger.encodeId(input, blobOpts.getBlobStoreType()))) { + writeStrings(iterator, writeFile, true, log, "Transformed to verbose ids - "); + } } protected static void setupLogging(DataStoreOptions dataStoreOpts) throws IOException { @@ -240,10 +357,16 @@ public class DataStoreCommand implements Command { static class NodeTraverserReferenceRetriever implements BlobReferenceRetriever { private final NodeStore nodeStore; private final String[] paths; + private final String[] inclusionRegex; + + public NodeTraverserReferenceRetriever(NodeStore nodeStore) { + this(nodeStore, null, null); + } - public NodeTraverserReferenceRetriever(NodeStore nodeStore, String ... paths) { + public NodeTraverserReferenceRetriever(NodeStore nodeStore, String[] paths, String[] inclusionRegex) { this.nodeStore = nodeStore; this.paths = paths; + this.inclusionRegex = inclusionRegex; } private void binaryProperties(NodeState state, String path, ReferenceCollector collector) { @@ -251,14 +374,15 @@ public class DataStoreCommand implements Command { String propPath = path;//PathUtils.concat(path, p.getName()); if (p.getType() == Type.BINARY) { String blobId = p.getValue(Type.BINARY).getContentIdentity(); - if (blobId != null) { + if (blobId != null && !p.getValue(Type.BINARY).isInlined()) { collector.addReference(blobId, propPath); } } else if (p.getType() == Type.BINARIES && p.count() > 0) { Iterator iterator = p.getValue(Type.BINARIES).iterator(); while (iterator.hasNext()) { - String blobId = iterator.next().getContentIdentity(); - if (blobId != null) { + Blob blob = iterator.next(); + String blobId = blob.getContentIdentity(); + if (blobId != null && !blob.isInlined()) { collector.addReference(blobId, propPath); } } @@ -275,7 +399,7 @@ public class DataStoreCommand implements Command { @Override public void collectReferences(ReferenceCollector collector) throws IOException { log.info("Starting dump of blob references by traversing"); - if (paths.length == 0) { + if (paths == null || paths.length == 0) { traverseChildren(nodeStore.getRoot(), "/", collector); } else { for (String path: paths) { @@ -284,10 +408,55 @@ public class DataStoreCommand implements Command { for (String node: nodeList) { state = state.getChildNode(node); } + + if (inclusionRegex == null || inclusionRegex.length == 0) { traverseChildren(state, path, collector); + } else { + for (String regex : inclusionRegex) { + Map inclusionMap = new HashMap(); + getInclusionListFromRegex(state, path, regex, inclusionMap); + if (inclusionMap.size() == 0 ) { + System.out.println("No valid paths found for traversal, " + + "for the inclusion Regex " + regex + " under the path " + path); + continue; + } + for(NodeState s : inclusionMap.keySet()) { + traverseChildren(s, inclusionMap.get(s), collector); + } + } } + + } + } + + + } + + private void getInclusionListFromRegex(NodeState rootState, String rootPath, String inclusionRegex, Map inclusionNodeStates) { + Splitter delimSplitter = Splitter.on("/").trimResults().omitEmptyStrings(); + List pathElementList = delimSplitter.splitToList(inclusionRegex); + + Joiner delimJoiner = Joiner.on("/").skipNulls(); + + // Get the first pathElement from the regexPath + String pathElement = pathElementList.get(0); + // If the pathElement == *, get all child nodes and scan under them for the rest of the regex + if ("*".equals(pathElement)) { + for (String nodeName : rootState.getChildNodeNames()) { + String rootPathTemp = PathUtils.concat(rootPath, nodeName); + // Remove the current Path Element from the regexPath + // and recurse on getInclusionListFromRegex with this childNodeState and the regexPath + // under the current pahtElement + String sub = delimJoiner.join(pathElementList.subList(1, pathElementList.size())); + getInclusionListFromRegex(rootState.getChildNode(nodeName), rootPathTemp, sub, inclusionNodeStates); + } + } else { + NodeState rootStateToInclude = rootState.getChildNode(pathElement); + if (rootStateToInclude.exists()) { + inclusionNodeStates.put(rootStateToInclude, PathUtils.concat(rootPath, pathElement)); } + } } } @@ -317,6 +486,7 @@ public class DataStoreCommand implements Command { outFileList.add(filterFiles(outDir, "marked-")); outFileList.add(filterFiles(outDir, "gccand-")); + outFileList.removeAll(Collections.singleton(null)); if (outFileList.size() == 0) { @@ -324,15 +494,19 @@ public class DataStoreCommand implements Command { } } + static File filterFiles(File outDir, String filePrefix) { + return filterFiles(outDir, "gcworkdir-", filePrefix); + } + @Nullable - static File filterFiles(File outDir, String prefix) { + static File filterFiles(File outDir, String dirPrefix, String filePrefix) { List subDirs = FileFilterUtils.filterList(FileFilterUtils - .and(FileFilterUtils.prefixFileFilter("gcworkdir-"), FileFilterUtils.directoryFileFilter()), + .and(FileFilterUtils.prefixFileFilter(dirPrefix), FileFilterUtils.directoryFileFilter()), outDir.listFiles()); if (subDirs != null && !subDirs.isEmpty()) { File workDir = subDirs.get(0); - List outFiles = FileFilterUtils.filterList(FileFilterUtils.prefixFileFilter(prefix), workDir.listFiles()); + List outFiles = FileFilterUtils.filterList(FileFilterUtils.prefixFileFilter(filePrefix), workDir.listFiles()); if (outFiles != null && !outFiles.isEmpty()) { return outFiles.get(0); @@ -342,20 +516,54 @@ public class DataStoreCommand implements Command { return null; } + /** + * Encode the blob id/blob ref in a format understood by the backing datastore + * + * Example: + * b47b58169f121822cd4a...#123311,/a/b/c => b47b-58169f121822cd4a...,/a/b/c (dsType = S3 or Azure) + * b47b58169f121822cd4a...#123311 => b47b-58169f121822cd4a... (dsType = S3 or Azure) + * + * @param line can be either of the format b47b...#12311,/a/b/c or + * b47b...#12311 + * @param dsType + * @return In case of ref dump, concatanated encoded blob ref in a + * format understood by backing datastore impl and the path + * on which ref is present separated by delimJoiner + * In case of id dump, just the encoded blob ids. + */ static String encodeId(String line, BlobStoreOptions.Type dsType) { + // Split the input line on ",". This would be the case while dumping refs along with paths + // Line would be like b47b58169f121822cd4a0a0a153ba5910e581ad2bc450b6af7e51e6214c2b173#123311,/a/b/c + // In case of dumping ids, there would not be any paths associated and there the line would simply be + // b47b58169f121822cd4a0a0a153ba5910e581ad2bc450b6af7e51e6214c2b173#123311 List list = delimSplitter.splitToList(line); String id = list.get(0); + // Split b47b58169f121822cd4a0a0a153ba5910e581ad2bc450b6af7e51e6214c2b173#123311 on # to get the id List idLengthSepList = Splitter.on(HASH).trimResults().omitEmptyStrings().splitToList(id); String blobId = idLengthSepList.get(0); if (dsType == FAKE || dsType == FDS) { + // 0102030405... => 01/02/03/0102030405... blobId = (blobId.substring(0, 2) + FILE_SEPARATOR.value() + blobId.substring(2, 4) + FILE_SEPARATOR.value() + blobId .substring(4, 6) + FILE_SEPARATOR.value() + blobId); } else if (dsType == S3 || dsType == AZURE) { + //b47b58169f121822cd4a0... => b47b-58169f121822cd4a0... blobId = (blobId.substring(0, 4) + DASH + blobId.substring(4)); } + + // Check if the line provided as input was a line dumped from blob refs or blob ids + // In case of blob refs dump, the list size would be 2 (Consisting of blob ref and the path on which ref is present) + // In case of blob ids dump, the list size would be 1 (Consisting of just the id) + if (list.size() > 1) { + // Join back the encoded blob ref and the path on which the ref is present return delimJoiner.join(blobId, EscapeUtils.unescapeLineBreaks(list.get(1))); + } else { + // return the encoded blob id + return blobId; + } + + } public void log() throws IOException { @@ -366,7 +574,7 @@ public class DataStoreCommand implements Command { try (BurnOnCloseFileIterator iterator = new BurnOnCloseFileIterator(FileUtils.lineIterator(tempFile, UTF_8.toString()), tempFile, (Function) input -> encodeId(input, blobStoreType))) { - FileIOUtils.writeStrings(iterator, outFile, true, log, "Transformed to verbose ids - "); + writeStrings(iterator, outFile, true, log, "Transformed to verbose ids - "); } } } diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java index b604d25..ca52390 100644 --- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java +++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java @@ -41,6 +41,8 @@ public class DataStoreOptions implements OptionsBean { private final OptionSpec outputDirOpt; private final OptionSpec collectGarbage; private final OptionSpec consistencyCheck; + private final OptionSpec refOp; + private final OptionSpec idOp; private final OptionSpec checkConsistencyAfterGC; private final OptionSpec batchCount; private OptionSet options; @@ -49,6 +51,7 @@ public class DataStoreOptions implements OptionsBean { private final OptionSpec blobGcMaxAgeInSecs; private final OptionSpec verbose; private final OptionSpec verboseRootPath; + private final OptionSpec verbosePathInclusionRegex; private final OptionSpec resetLoggingConfig; private OptionSpec exportMetrics; private static final String DELIM = ","; @@ -66,6 +69,10 @@ public class DataStoreOptions implements OptionsBean { consistencyCheck = parser.accepts("check-consistency", "Performs a consistency check on the repository/datastore defined"); + refOp = parser.accepts("dump-ref", "Gets a dump of Blob References"); + + idOp = parser.accepts("dump-id", "Gets a dump of Blob Ids"); + blobGcMaxAgeInSecs = parser.accepts("max-age", "") .withRequiredArg().ofType(Long.class).defaultsTo(86400L); batchCount = parser.accepts("batch", "Batch count") @@ -79,10 +86,16 @@ public class DataStoreOptions implements OptionsBean { verbose = parser.accepts("verbose", "Option to get all the paths and implementation specific blob ids"); - // Option NOT available for garbage collection operation - we throw an exception if both --collect-garbage and + // Option NOT available for garbage collection operation - we throw an + // exception if both --collect-garbage and // --verboseRootPath are provided in the command. verboseRootPath = parser.accepts("verboseRootPath", - "Root path to output backend formatted ids/paths").availableUnless(collectGarbage).availableIf(verbose).withRequiredArg().withValuesSeparatedBy(DELIM).ofType(String.class); + "Root path to output backend formatted ids/paths").availableUnless(collectGarbage).availableIf(verbose) + .withRequiredArg().withValuesSeparatedBy(DELIM).ofType(String.class); + + verbosePathInclusionRegex = parser.accepts("verbosePathInclusionRegex", "Regex to provide an inclusion list for " + + "nodes that will be scanned under the path provided with the option --verboseRootPath").availableIf(verboseRootPath). + withRequiredArg().withValuesSeparatedBy(DELIM).ofType(String.class); resetLoggingConfig = parser.accepts("reset-log-config", "Reset logging config for testing purposes only").withOptionalArg() @@ -91,7 +104,7 @@ public class DataStoreOptions implements OptionsBean { "type, URI to export the metrics and optional metadata all delimeted by semi-colon(;)").withRequiredArg(); //Set of options which define action - actionOpts = ImmutableSet.of(collectGarbage, consistencyCheck); + actionOpts = ImmutableSet.of(collectGarbage, consistencyCheck, idOp, refOp); operationNames = collectionOperationNames(actionOpts); } @@ -147,6 +160,14 @@ public class DataStoreOptions implements OptionsBean { return options.has(consistencyCheck); } + public boolean dumpRefs() { + return options.has(refOp); + } + + public boolean dumpIds() { + return options.has(idOp); + } + public boolean checkConsistencyAfterGC() { return options.has(checkConsistencyAfterGC) && checkConsistencyAfterGC.value(options) ; } @@ -171,6 +192,10 @@ public class DataStoreOptions implements OptionsBean { return options.has(verboseRootPath); } + public boolean hasVerboseInclusionRegex() { + return options.has(verbosePathInclusionRegex); + } + public boolean isResetLoggingConfig() { return resetLoggingConfig.value(options); } @@ -195,4 +220,8 @@ public class DataStoreOptions implements OptionsBean { return options.valuesOf(verboseRootPath); } + public List getVerboseInclusionRegex() { + return options.valuesOf(verbosePathInclusionRegex); + } + } diff --git a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java index 63d3e38..5a08c8d 100644 --- a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java +++ b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -159,6 +160,11 @@ public class DataStoreCommandTest { private static Data prepareData(StoreFixture storeFixture, DataStoreFixture blobFixture, int numBlobs, int numMaxDeletions, int missingDataStore) throws Exception{ + return prepareData(storeFixture, blobFixture, numBlobs, numMaxDeletions, missingDataStore, false); + } + + private static Data prepareData(StoreFixture storeFixture, DataStoreFixture blobFixture, int numBlobs, + int numMaxDeletions, int missingDataStore, boolean createMultiLevelNodes) throws Exception { DataStoreBlobStore blobStore = blobFixture.getDataStore(); NodeStore store = storeFixture.getNodeStore(); @@ -176,27 +182,54 @@ public class DataStoreCommandTest { } NodeBuilder a = store.getRoot().builder(); + Map map = new HashMap(); + map.put("/",a); + if (createMultiLevelNodes) { + NodeBuilder a1 = a.child("foo"); + NodeBuilder a2 = a.child("bar"); + + for (int j = 0 ; j < 5 ; j ++) { + NodeBuilder a11 = a1.child("foo" + j).child("test"); + map.put("/foo/foo" + j + "/test", a11); + NodeBuilder a22 = a2.child("bar" + j).child("test"); + map.put("/bar/bar" + j + "/test", a22); + } + } for (int i = 0; i < numBlobs; i++) { + List valuesList = new ArrayList(map.keySet()); + int randomIndex = new Random().nextInt(valuesList.size()); + + + String pathRoot = valuesList.get(randomIndex); + Blob b = store.createBlob(randomStream(i, 18342)); Iterator idIter = blobStore.resolveChunks(b.getContentIdentity()); while (idIter.hasNext()) { String chunk = idIter.next(); data.added.add(chunk); - data.idToPath.put(chunk, "/c" + i); - if (toBeDeleted.contains(i)) { + data.idToPath.put(chunk, (createMultiLevelNodes ? pathRoot : "") + "/c" + i); + if (!createMultiLevelNodes && toBeDeleted.contains(i)) { data.deleted.add(chunk); } } + if (createMultiLevelNodes) { + map.get(pathRoot).child("c" + i).setProperty("x", b); + } else { a.child("c" + i).setProperty("x", b); } + } store.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY); log.info("Created Data : {}", data); + // Not deleting nodes in case of multi level nodes + // Probably change/improve this in future. + if (!createMultiLevelNodes) { for (int id : toBeDeleted) { delete("c" + id, store); } log.info("Deleted nodes : {}", toBeDeleted.size()); + } int missing = 0; Iterator iterator = data.added.iterator(); @@ -391,6 +424,95 @@ public class DataStoreCommandTest { } @Test + public void testDumpRefWithVerboseRootPath() throws Exception { + File dump = temporaryFolder.newFolder(); + Data data = prepareData(storeFixture, blobFixture, 10, 4, 1); + storeFixture.close(); + additionalParams += " --verboseRootPath /c1,/c2"; + + + for (String id : data.idToPath.keySet()) { + if (data.idToPath.get(id).equals("/c1") || data.idToPath.get(id).equals("/c2")) { + data.addedSubset.add(id); + } + } + testDumpRef(dump, data, true, true); + } + + @Test + public void testDumpRef() throws Exception { + File dump = temporaryFolder.newFolder(); + Data data = prepareData(storeFixture, blobFixture, 10, 4, 1); + storeFixture.close(); + + testDumpRef(dump, data, true, false); + } + + @Test + public void testDumpRefWithoutVerbose() throws Exception { + File dump = temporaryFolder.newFolder(); + Data data = prepareData(storeFixture, blobFixture, 10, 4, 1); + storeFixture.close(); + + testDumpRef(dump, data, false, false); + } + + @Test + public void testDumpRefVerboseRootPathRegex() throws Exception { + File dump = temporaryFolder.newFolder(); + Data data = prepareData(storeFixture, blobFixture, 10, 4, 1, true); + storeFixture.close(); + + additionalParams += " --verboseRootPath /foo --verbosePathInclusionRegex /*/test"; + + for (String id : data.idToPath.keySet()) { + if (data.idToPath.get(id).contains("/foo")) { + data.addedSubset.add(id); + } + } + + testDumpRef(dump, data, true, true); + } + + @Test + public void testDumpRefVerboseRegexWithoutVerboseRootPath() throws Exception { + File dump = temporaryFolder.newFolder(); + Data data = prepareData(storeFixture, blobFixture, 10, 4, 1); + storeFixture.close(); + + additionalParams += " --verbosePathInclusionRegex /*/*"; + + try { + // Create exec command without --verboseRootPath - It should throw an exception + // since we used --verbosePathInclusionRegex without --verboseRootPath + testDumpRef(dump, data, true, false); + } catch (OptionException e) { + assertTrue(e.getMessage().equals("Option(s) [verbosePathInclusionRegex] are unavailable " + + "given other options on the command line")); + return; + } + assertFalse(true); + } + + @Test + public void testDumpId() throws Exception { + File dump = temporaryFolder.newFolder(); + Data data = prepareData(storeFixture, blobFixture, 10, 4, 1); + storeFixture.close(); + + testDumpIds(dump, data, true); + } + + @Test + public void testDumpIdWithoutVerbose() throws Exception { + File dump = temporaryFolder.newFolder(); + Data data = prepareData(storeFixture, blobFixture, 10, 4, 1); + storeFixture.close(); + + testDumpIds(dump, data, false); + } + + @Test public void testConsistencyNoMissing() throws Exception { File dump = temporaryFolder.newFolder(); Data data = prepareData(storeFixture, blobFixture, 10, 5, 0); @@ -577,6 +699,51 @@ public class DataStoreCommandTest { data.missingDataStore); } + private void testDumpRef(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception { + List argsList = Lists + .newArrayList("--dump-ref", "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(), + storeFixture.getConnectionString(), "--out-dir", dump.getAbsolutePath(), "--work-dir", + temporaryFolder.newFolder().getAbsolutePath()); + if (!Strings.isNullOrEmpty(additionalParams)) { + argsList.addAll(Splitter.on(" ").splitToList(additionalParams)); + } + + if (verbose) { + argsList.add("--verbose"); + } + DataStoreCommand cmd = new DataStoreCommand(); + cmd.execute(argsList.toArray(new String[0])); + + // Verbose would have paths as well as ids changed, otherwise only ids would be listed as it is. + assertFileEquals(dump, "dump-ref-", verbose ? + encodedIdsAndPath(Sets.difference(verboseRootPath ? data.addedSubset : + data.added, data.deleted), blobFixture.getType(), data.idToPath, true) : + Sets.difference(data.added, data.deleted), "dump"); + + } + + private void testDumpIds(File dump, Data data, boolean verbose) throws Exception { + List argsList = Lists + .newArrayList("--dump-id", "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(), + storeFixture.getConnectionString(), "--out-dir", dump.getAbsolutePath(), "--work-dir", + temporaryFolder.newFolder().getAbsolutePath()); + if (!Strings.isNullOrEmpty(additionalParams)) { + argsList.addAll(Splitter.on(" ").splitToList(additionalParams)); + } + + if (verbose) { + argsList.add("--verbose"); + } + DataStoreCommand cmd = new DataStoreCommand(); + cmd.execute(argsList.toArray(new String[0])); + + // Verbose would have backend friendly encoded ids + assertFileEquals(dump, "dump-id-", verbose ? + encodeIds(Sets.difference(data.added, data.missingDataStore), blobFixture.getType()) : + Sets.difference(data.added, data.missingDataStore), "dump"); + + } + private void testGc(File dump, Data data, long maxAge, boolean markOnly) throws Exception { List argsList = Lists .newArrayList("--collect-garbage", String.valueOf(markOnly), "--max-age", String.valueOf(maxAge), @@ -655,9 +822,13 @@ public class DataStoreCommandTest { return idMapping; } - private static void assertFileEquals(File dump, String prefix, Set blobsAdded) + private static void assertFileEquals(File dump, String prefix, Set blobsAdded) throws IOException{ + assertFileEquals(dump, prefix, blobsAdded, null); + } + + private static void assertFileEquals(File dump, String prefix, Set blobsAdded, String dirPrefix) throws IOException { - File file = filterFiles(dump, prefix); + File file = (dirPrefix == null) ? filterFiles(dump, prefix) : filterFiles(dump, dirPrefix, prefix); Assert.assertNotNull(file); Assert.assertTrue(file.exists()); assertEquals(blobsAdded, @@ -702,6 +873,15 @@ public class DataStoreCommandTest { })); } + private static Set encodeIds(Set ids, Type dsOption) { + return Sets.newHashSet(Iterators.transform(ids.iterator(), new Function() { + @Nullable @Override public String apply(@Nullable String input) { + return encodeId(input, dsOption); + } + })); + } + + static String encodeId(String id, Type dsType) { List idLengthSepList = Splitter.on(HASH).trimResults().omitEmptyStrings().splitToList(id); String blobId = idLengthSepList.get(0); @@ -728,10 +908,16 @@ public class DataStoreCommandTest { } static class Data { + // Set of blob ids created private Set added; + // Mapping of id to the path on which their ref exists private Map idToPath; + // Set of deleted paths private Set deleted; + // Set of blobs that will be deleted from datastore private Set missingDataStore; + // A subset of added blobs - use this to test the --verboseRootPath option + // (In case of getting the blob refs only under a specific path) private Set addedSubset; public Data() {