Index: oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java =================================================================== --- oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java (revision 1862053) +++ oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java (working copy) @@ -19,6 +19,20 @@ package org.apache.jackrabbit.oak.plugins.document.mongo; +import static com.google.common.collect.Iterables.concat; +import static com.google.common.collect.Iterables.filter; +import static com.google.common.collect.Iterables.transform; +import static java.util.Collections.emptyList; +import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; +import static org.apache.jackrabbit.oak.plugins.document.Document.ID; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; +import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH; + import java.util.ArrayList; import java.util.List; import java.util.Set; @@ -25,19 +39,9 @@ import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; -import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.base.Predicate; -import com.google.common.base.StandardSystemProperty; -import com.google.common.collect.Lists; -import com.mongodb.BasicDBObject; -import com.mongodb.Block; -import com.mongodb.client.FindIterable; -import com.mongodb.client.MongoCollection; -import com.mongodb.client.model.Filters; - import org.apache.jackrabbit.oak.plugins.document.Document; import org.apache.jackrabbit.oak.plugins.document.NodeDocument; +import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType; import org.apache.jackrabbit.oak.plugins.document.Revision; import org.apache.jackrabbit.oak.plugins.document.RevisionVector; import org.apache.jackrabbit.oak.plugins.document.SplitDocumentCleanUp; @@ -51,19 +55,16 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static com.google.common.collect.Iterables.filter; -import static com.google.common.collect.Iterables.transform; -import static java.util.Collections.singletonList; -import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; -import static org.apache.jackrabbit.oak.plugins.document.Document.ID; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH; -import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs; +import com.google.common.base.Function; +import com.google.common.base.Joiner; +import com.google.common.base.Predicate; +import com.google.common.base.StandardSystemProperty; +import com.google.common.collect.Lists; +import com.mongodb.BasicDBObject; +import com.mongodb.Block; +import com.mongodb.client.FindIterable; +import com.mongodb.client.MongoCollection; +import com.mongodb.client.model.Filters; /** * Mongo specific version of VersionGCSupport which uses mongo queries @@ -122,19 +123,33 @@ protected Iterable identifyGarbage(final Set gcTypes, final RevisionVector sweepRevs, final long oldestRevTimeStamp) { - return filter(transform(getNodeCollection().find( - createQuery(gcTypes, sweepRevs, oldestRevTimeStamp)), - new Function() { - @Override - public NodeDocument apply(BasicDBObject input) { - return store.convertFromDBObject(NODES, input); - } - }), new Predicate() { - @Override - public boolean apply(NodeDocument input) { - return !isDefaultNoBranchSplitNewerThan(input, sweepRevs); - } - }); + // With OAK-8351 this switched from 1 to 2 queries (see createQueries) + // hence we iterate over the queries returned by createQueries + List queries = createQueries(gcTypes, sweepRevs, oldestRevTimeStamp); + Iterable allResults = emptyList(); + for (Bson query : queries) { + // this query uses a timeout of 15min. hitting the timeout will + // result in an exception which should show up in the log file. + // while this doesn't resolve the situation (the restructuring + // of the query as part of OAK-8351 does), it nevertheless + // makes any future similar problem more visible than long running + // queries alone (15min is still long). + Iterable iterable = filter(transform(getNodeCollection().find(query) + .maxTime(15, TimeUnit.MINUTES), + new Function() { + @Override + public NodeDocument apply(BasicDBObject input) { + return store.convertFromDBObject(NODES, input); + } + }), new Predicate() { + @Override + public boolean apply(NodeDocument input) { + return !isDefaultNoBranchSplitNewerThan(input, sweepRevs); + } + }); + allResults = concat(allResults, iterable); + } + return allResults; } @Override @@ -162,32 +177,33 @@ return result.get(0); } - private Bson createQuery(Set gcTypes, + private List createQueries(Set gcTypes, RevisionVector sweepRevs, long oldestRevTimeStamp) { - List gcTypeCodes = Lists.newArrayList(); + List result = Lists.newArrayList(); List orClauses = Lists.newArrayList(); for(SplitDocType type : gcTypes) { - gcTypeCodes.add(type.typeCode()); - for (Bson query : queriesForType(type, sweepRevs)) { - orClauses.add(query); + if (DEFAULT_NO_BRANCH != type) { + orClauses.add(Filters.eq(SD_TYPE, type.typeCode())); + } else { + result.add(queryForDefaultNoBranch(sweepRevs, getModifiedInSecs(oldestRevTimeStamp))); } } - return Filters.and( - Filters.in(SD_TYPE, gcTypeCodes), + // OAK-8351: this (last) query only contains SD_TYPE and SD_MAX_REV_TIME_IN_SECS + // so mongodb should really use that _sdType_1__sdMaxRevTime_1 index + result.add(Filters.and( Filters.or(orClauses), Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp)) - ); + )); + + return result; } @NotNull - private Iterable queriesForType(SplitDocType type, RevisionVector sweepRevs) { - if (type != DEFAULT_NO_BRANCH) { - return singletonList(Filters.eq(SD_TYPE, type.typeCode())); - } + private Bson queryForDefaultNoBranch(RevisionVector sweepRevs, long maxRevTimeInSecs) { // default_no_branch split type is special because we can // only remove those older than sweep rev - List queries = Lists.newArrayList(); + List orClauses = Lists.newArrayList(); for (Revision r : sweepRevs) { String idSuffix = Utils.getPreviousIdFor("/", r, 0); idSuffix = idSuffix.substring(idSuffix.lastIndexOf('-')); @@ -202,13 +218,16 @@ ) ); - queries.add(Filters.and( - Filters.eq(SD_TYPE, type.typeCode()), + orClauses.add(Filters.and( idPathClause, Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(r.getTimestamp())) )); } - return queries; + return Filters.and( + Filters.eq(SD_TYPE, DEFAULT_NO_BRANCH.typeCode()), + Filters.lt(SD_MAX_REV_TIME_IN_SECS, maxRevTimeInSecs), + Filters.or(orClauses) + ); } private void logSplitDocIdsTobeDeleted(Bson query) { @@ -257,15 +276,21 @@ @Override protected int deleteSplitDocuments() { - Bson query = createQuery(gcTypes, sweepRevs, oldestRevTimeStamp); + List queries = createQueries(gcTypes, sweepRevs, oldestRevTimeStamp); if(LOG.isDebugEnabled()){ //if debug level logging is on then determine the id of documents to be deleted //and log them - logSplitDocIdsTobeDeleted(query); + for (Bson query : queries) { + logSplitDocIdsTobeDeleted(query); + } } - return (int) getNodeCollection().deleteMany(query).getDeletedCount(); + int cnt = 0; + for (Bson query : queries) { + cnt += getNodeCollection().deleteMany(query).getDeletedCount(); + } + return cnt; } } } Index: oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSplitDocTest.java =================================================================== --- oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSplitDocTest.java (nonexistent) +++ oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSplitDocTest.java (working copy) @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.plugins.document; + +import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static org.apache.jackrabbit.oak.commons.FixturesHelper.getFixtures; +import static org.apache.jackrabbit.oak.commons.FixturesHelper.Fixture.DOCUMENT_MEM; +import static org.apache.jackrabbit.oak.commons.FixturesHelper.Fixture.DOCUMENT_NS; +import static org.apache.jackrabbit.oak.commons.FixturesHelper.Fixture.DOCUMENT_RDB; +import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES; +import static org.apache.jackrabbit.oak.plugins.document.TestUtils.NO_BINARY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; +import org.apache.jackrabbit.oak.plugins.document.util.Utils; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.stats.Clock; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import com.google.common.collect.Lists; + +@RunWith(Parameterized.class) +public class VersionGCSplitDocTest { + + @Rule + public final DocumentMKBuilderProvider builderProvider = new DocumentMKBuilderProvider(); + + private DocumentStoreFixture fixture; + + private ExecutorService execService; + + private DocumentStore store; + + private DocumentNodeStore ns; + + private VersionGarbageCollector gc; + + private String longpath; + + private Clock clock; + + public VersionGCSplitDocTest(DocumentStoreFixture fixture) { + this.fixture = fixture; + } + + @Parameterized.Parameters(name = "{0}") + public static Collection fixtures() throws IOException { + List fixtures = Lists.newArrayList(); + DocumentStoreFixture mongo = new DocumentStoreFixture.MongoFixture(); + if (getFixtures().contains(DOCUMENT_NS) && mongo.isAvailable()) { + fixtures.add(new Object[] { mongo }); + } + + DocumentStoreFixture rdb = new DocumentStoreFixture.RDBFixture(); + if (getFixtures().contains(DOCUMENT_RDB) && rdb.isAvailable()) { + fixtures.add(new Object[] { rdb }); + } + if (fixtures.isEmpty() || getFixtures().contains(DOCUMENT_MEM)) { + fixtures.add(new Object[] { new DocumentStoreFixture.MemoryFixture() }); + } + + return fixtures; + } + + @Before + public void setUp() throws Exception { + StringBuffer longpath = new StringBuffer(); + while (longpath.length() < 380) { + longpath.append("thisisaverylongpath"); + } + this.longpath = longpath.toString(); + + clock = new Clock.Virtual(); + store = fixture.createDocumentStore(); + if (fixture.getName().equals("MongoDB")) { + MongoUtils.dropCollections(MongoUtils.DB); + } + + execService = Executors.newCachedThreadPool(); + clock.waitUntil(System.currentTimeMillis()); + Revision.setClock(clock); + + ns = builderProvider.newBuilder().clock(clock).setLeaseCheckMode(LeaseCheckMode.DISABLED) + .setDocumentStore(store).setAsyncDelay(0).getNodeStore(); + gc = ns.getVersionGarbageCollector(); + } + + private void createDefaultNoBranchSplitDocument(DocumentNodeStore ns, String parent) throws CommitFailedException { + NodeBuilder builder = ns.getRoot().builder(); + builder.child("createNoBranchSplitDocument" + longpath).child(parent).child("bar"); + merge(ns, builder); + + for (int i = 0; i < 5; i++) { + builder = ns.getRoot().builder(); + builder.child("createNoBranchSplitDocument" + longpath).child(parent).setProperty("p", "value-" + i); + merge(ns, builder); + } + ns.runBackgroundOperations(); + String id = Utils.getIdFromPath("/" + "createNoBranchSplitDocument" + longpath + "/" + parent); + NodeDocument doc = store.find(NODES, id); + assertNotNull(doc); + for (UpdateOp op : SplitOperations.forDocument(doc, ns, ns.getHeadRevision(), NO_BINARY, 5)) { + ns.getDocumentStore().createOrUpdate(NODES, op); + } + } + + private void createCommitOnlyAndNoChildSplitDocument(DocumentNodeStore ns, String parent1, String parent2, + String child) throws CommitFailedException { + NodeBuilder b1 = ns.getRoot().builder(); + b1.child("createCommitOnlyAndNoChildSplitDocument" + longpath).child(parent1).child(child).child("bar"); + b1.child("createCommitOnlyAndNoChildSplitDocument" + longpath).child(parent2).child(child); + ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + //Commit on a node which has a child and where the commit root + // is parent + for (int i = 0; i < NodeDocument.NUM_REVS_THRESHOLD; i++) { + b1 = ns.getRoot().builder(); + //This updates a middle node i.e. one which has child bar + //Should result in SplitDoc of type PROP_COMMIT_ONLY + b1.child("createCommitOnlyAndNoChildSplitDocument" + longpath).child(parent1).child(child) + .setProperty("prop", i); + + //This should result in SplitDoc of type DEFAULT_NO_CHILD + b1.child("createCommitOnlyAndNoChildSplitDocument" + longpath).child(parent2).child(child) + .setProperty("prop", i); + ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + } + } + + private void createCommitOnlySplitDocument(DocumentNodeStore ns, String parent1, String parent2, String child) + throws CommitFailedException { + NodeBuilder b1 = ns.getRoot().builder(); + b1.child("createCommitOnlySplitDocument" + longpath).child(parent1).child(child).child("bar"); + b1.child("createCommitOnlySplitDocument" + longpath).child(parent2).child(child); + ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + //Commit on a node which has a child and where the commit root + // is parent + for (int i = 0; i < 2 * NodeDocument.NUM_REVS_THRESHOLD; i++) { + b1 = ns.getRoot().builder(); + //This updates a middle node i.e. one which has child bar + //Should result in SplitDoc of type PROP_COMMIT_ONLY + b1.child("createCommitOnlySplitDocument" + longpath).child(parent1).child(child).setProperty("prop", i); + + b1.child("createCommitOnlySplitDocument" + longpath).child("child-" + i); + ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + } + } + + private void createDefaultLeafSplitDocument(DocumentNodeStore ns, String parent1, String parent2, String child) + throws CommitFailedException { + NodeBuilder b1 = ns.getRoot().builder(); + b1.child("createDefaultLeafSplitDocument" + longpath).child(parent1).child(child).child("bar"); + b1.child("createDefaultLeafSplitDocument" + longpath).child(parent2).child(child); + ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + //Commit on a node which has a child and where the commit root + // is parent + for (int i = 0; i < NodeDocument.NUM_REVS_THRESHOLD; i++) { + //This should result in SplitDoc of type DEFAULT_NO_CHILD (aka DEFAULT_LEAF) + b1 = ns.getRoot().builder(); + b1.child("createDefaultLeafSplitDocument" + longpath).child(parent2).child(child).setProperty("prop", i); + ns.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY); + } + } + + @After + public void tearDown() throws Exception { + execService.shutdown(); + execService.awaitTermination(1, MINUTES); + } + + @AfterClass + public static void resetClock() { + Revision.resetClockToDefault(); + } + + private Future gc() { + // run gc in a separate thread + return execService.submit(new Callable() { + @Override + public VersionGCStats call() throws Exception { + return gc.gc(1, TimeUnit.MILLISECONDS); + } + }); + } + + private void merge(DocumentNodeStore store, NodeBuilder builder) throws CommitFailedException { + store.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + } + + @Test + public void emptyGC() throws Exception { + assertEquals(0, gc().get().splitDocGCCount); + } + + private int countNodeDocuments() { + return store.query(NODES, NodeDocument.MIN_ID_VALUE, NodeDocument.MAX_ID_VALUE, Integer.MAX_VALUE).size(); + } + + private int countStalePrev() { + int cnt = 0; + List nodes = store.query(NODES, NodeDocument.MIN_ID_VALUE, NodeDocument.MAX_ID_VALUE, + Integer.MAX_VALUE); + for (NodeDocument nodeDocument : nodes) { + cnt += nodeDocument.getStalePrev().size(); + } + return cnt; + } + + @Test + public void commitOnlyAndNoChild() throws Exception { + createCommitOnlyAndNoChildSplitDocument(ns, "parent1", "parent2", "child"); + + // perform a change to make sure the sweep rev will be newer than + clock.waitUntil(clock.getTime() + TimeUnit.SECONDS.toMillis(NodeDocument.MODIFIED_IN_SECS_RESOLUTION * 2)); + NodeBuilder builder = ns.getRoot().builder(); + builder.child("qux"); + ns.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + ns.runBackgroundOperations(); + + // wait one hour + clock.waitUntil(clock.getTime() + HOURS.toMillis(1)); + + int nodesBeforeGc = countNodeDocuments(); + assertEquals(0, countStalePrev()); + final VersionGCStats stats = gc().get(); + int nodesAfterGc = countNodeDocuments(); + assertEquals(3, countStalePrev()); + assertEquals(3, nodesBeforeGc - nodesAfterGc); + assertEquals(3, stats.splitDocGCCount); + } + + @Test + public void commitOnly() throws Exception { + createCommitOnlySplitDocument(ns, "parent1", "parent2", "child"); + + // perform a change to make sure the sweep rev will be newer than + clock.waitUntil(clock.getTime() + TimeUnit.SECONDS.toMillis(NodeDocument.MODIFIED_IN_SECS_RESOLUTION * 2)); + NodeBuilder builder = ns.getRoot().builder(); + builder.child("qux"); + ns.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + ns.runBackgroundOperations(); + + // wait one hour + clock.waitUntil(clock.getTime() + HOURS.toMillis(1)); + + int nodesBeforeGc = countNodeDocuments(); + assertEquals(0, countStalePrev()); + final VersionGCStats stats = gc().get(); + int nodesAfterGc = countNodeDocuments(); + System.out.println("before gc : " + nodesBeforeGc + ", after gc : " + nodesAfterGc); + assertTrue(countStalePrev() >= 1); + assertTrue(nodesBeforeGc - nodesAfterGc >= 1); + assertTrue(stats.splitDocGCCount >= 1); + } + + @Test + public void defaultLeaf() throws Exception { + createDefaultLeafSplitDocument(ns, "parent1", "parent2", "child"); + + // perform a change to make sure the sweep rev will be newer than + clock.waitUntil(clock.getTime() + TimeUnit.SECONDS.toMillis(NodeDocument.MODIFIED_IN_SECS_RESOLUTION * 2)); + NodeBuilder builder = ns.getRoot().builder(); + builder.child("qux"); + ns.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + ns.runBackgroundOperations(); + + // wait one hour + clock.waitUntil(clock.getTime() + HOURS.toMillis(1)); + + int nodesBeforeGc = countNodeDocuments(); + assertEquals(0, countStalePrev()); + final VersionGCStats stats = gc().get(); + int nodesAfterGc = countNodeDocuments(); + assertEquals(1, countStalePrev()); + assertEquals(1, nodesBeforeGc - nodesAfterGc); + assertEquals(1, stats.splitDocGCCount); + } + + @Test + public void defaultNoBranch() throws Exception { + createDefaultNoBranchSplitDocument(ns, "aparent"); + + // perform a change to make sure the sweep rev will be newer than + clock.waitUntil(clock.getTime() + TimeUnit.SECONDS.toMillis(NodeDocument.MODIFIED_IN_SECS_RESOLUTION * 2)); + NodeBuilder builder = ns.getRoot().builder(); + builder.child("qux"); + ns.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY); + + // wait one hour + clock.waitUntil(clock.getTime() + HOURS.toMillis(1)); + + ns.runBackgroundOperations(); + + int nodesBeforeGc = countNodeDocuments(); + assertEquals(0, countStalePrev()); + final VersionGCStats stats = gc().get(); + int nodesAfterGc = countNodeDocuments(); + assertEquals(1, countStalePrev()); + assertEquals(1, nodesBeforeGc - nodesAfterGc); + assertEquals(1, stats.splitDocGCCount); + } + +} Property changes on: oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSplitDocTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property