Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java (Revision 1781354) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java (Arbeitskopie) @@ -38,6 +38,7 @@ import com.google.common.base.Predicate; import com.google.common.base.Stopwatch; import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -115,6 +116,7 @@ boolean ignoredGCDueToCheckPoint; boolean canceled; int deletedDocGCCount; + int deletedLeafDocGCCount; int splitDocGCCount; int intermediateSplitDocGCCount; final Stopwatch collectDeletedDocs = Stopwatch.createUnstarted(); @@ -127,7 +129,7 @@ return "VersionGCStats{" + "ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint + ", canceled=" + canceled + - ", deletedDocGCCount=" + deletedDocGCCount + + ", deletedDocGCCount=" + deletedDocGCCount + " (of which leaf: " + deletedLeafDocGCCount + ")" + ", splitDocGCCount=" + splitDocGCCount + ", intermediateSplitDocGCCount=" + intermediateSplitDocGCCount + ", timeToCollectDeletedDocs=" + collectDeletedDocs + @@ -138,6 +140,78 @@ } } + private enum GCPhase { + NONE, + COLLECTING, + DELETING, + SORTING, + SPLITS_CLEANUP + } + + /** + * Keeps track of timers when switching GC phases. + * + * Could be merged with VersionGCStats, however this way the public class is kept unchanged. + */ + private static class GCPhases { + + final VersionGCStats stats; + final Stopwatch elapsed; + private final List phases = Lists.newArrayList(); + private final Map watches = Maps.newHashMap(); + + GCPhases() { + this.stats = new VersionGCStats(); + this.elapsed = Stopwatch.createStarted(); + this.watches.put(GCPhase.NONE, Stopwatch.createStarted()); + this.watches.put(GCPhase.COLLECTING, stats.collectDeletedDocs); + this.watches.put(GCPhase.DELETING, stats.deleteDeletedDocs); + this.watches.put(GCPhase.SORTING, stats.sortDocIds); + this.watches.put(GCPhase.SPLITS_CLEANUP, stats.collectAndDeleteSplitDocs); + } + + public void start(GCPhase started) { + suspend(currentWatch()); + this.phases.add(started); + resume(currentWatch()); + } + + public void stop(GCPhase phase) { + if (!phases.isEmpty() && phase == phases.get(phases.size() - 1)) { + suspend(currentWatch()); + phases.remove(phases.size() - 1); + resume(currentWatch()); + } + } + + public void close() { + while (!phases.isEmpty()) { + suspend(currentWatch()); + phases.remove(phases.size() - 1); + } + this.elapsed.stop(); + } + + private GCPhase current() { + return phases.isEmpty()? GCPhase.NONE : phases.get(phases.size() - 1); + } + + private Stopwatch currentWatch() { + return watches.get(current()); + } + + private void resume(Stopwatch w) { + if (!w.isRunning()) { + w.start(); + } + } + private void suspend(Stopwatch w) { + if (w.isRunning()) { + w.stop(); + } + } + } + private class GCJob { private final long maxRevisionAgeMillis; @@ -157,8 +231,7 @@ } private VersionGCStats gc(long maxRevisionAgeInMillis) throws IOException { - Stopwatch sw = Stopwatch.createStarted(); - VersionGCStats stats = new VersionGCStats(); + GCPhases phases = new GCPhases(); final long oldestRevTimeStamp = nodeStore.getClock().getTime() - maxRevisionAgeInMillis; final RevisionVector headRevision = nodeStore.getHeadRevision(); @@ -173,26 +246,26 @@ checkpoint.toReadableString(), Utils.timestampToString(oldestRevTimeStamp) ); - stats.ignoredGCDueToCheckPoint = true; - return stats; + phases.stats.ignoredGCDueToCheckPoint = true; + return phases.stats; } - collectDeletedDocuments(stats, headRevision, oldestRevTimeStamp); - collectSplitDocuments(stats, oldestRevTimeStamp); + collectDeletedDocuments(phases, headRevision, oldestRevTimeStamp); + collectSplitDocuments(phases, oldestRevTimeStamp); - sw.stop(); - stats.canceled = cancel.get(); - log.info("Revision garbage collection finished in {}. {}", sw, stats); - return stats; + phases.close(); + phases.stats.canceled = cancel.get(); + log.info("Revision garbage collection finished in {}. {}", phases.elapsed, phases.stats); + return phases.stats; } - private void collectSplitDocuments(VersionGCStats stats, long oldestRevTimeStamp) { - stats.collectAndDeleteSplitDocs.start(); - versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp, stats); - stats.collectAndDeleteSplitDocs.stop(); + private void collectSplitDocuments(GCPhases phases, long oldestRevTimeStamp) { + phases.start(GCPhase.SPLITS_CLEANUP); + versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp, phases.stats); + phases.stop(GCPhase.SPLITS_CLEANUP); } - private void collectDeletedDocuments(VersionGCStats stats, + private void collectDeletedDocuments(GCPhases phases, RevisionVector headRevision, long oldestRevTimeStamp) throws IOException { @@ -199,7 +272,7 @@ int docsTraversed = 0; DeletedDocsGC gc = new DeletedDocsGC(headRevision, cancel); try { - stats.collectDeletedDocs.start(); + phases.start(GCPhase.COLLECTING); Iterable itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp); try { for (NodeDocument doc : itr) { @@ -217,23 +290,32 @@ docsTraversed, gc.getNumDocuments()); } gc.possiblyDeleted(doc); + if (gc.hasLeafBatch()) { + phases.start(GCPhase.DELETING); + gc.removeLeafDocuments(phases.stats); + phases.stop(GCPhase.DELETING); + } } } finally { Utils.closeIfCloseable(itr); } - stats.collectDeletedDocs.stop(); + phases.stop(GCPhase.COLLECTING); if (gc.getNumDocuments() == 0){ return; } - stats.sortDocIds.start(); + phases.start(GCPhase.DELETING); + gc.removeLeafDocuments(phases.stats); + phases.stop(GCPhase.DELETING); + + phases.start(GCPhase.SORTING); gc.ensureSorted(); - stats.sortDocIds.stop(); + phases.stop(GCPhase.SORTING); - stats.deleteDeletedDocs.start(); - gc.removeDocuments(stats); - stats.deleteDeletedDocs.stop(); + phases.start(GCPhase.DELETING); + gc.removeDocuments(phases.stats); + phases.stop(GCPhase.DELETING); } finally { gc.close(); } @@ -247,6 +329,7 @@ private final RevisionVector headRevision; private final AtomicBoolean cancel; + private final List leafDocIdsToDelete = Lists.newArrayList(); private final StringSort docIdsToDelete = newStringSort(); private final StringSort prevDocIdsToDelete = newStringSort(); private final Set exclude = Sets.newHashSet(); @@ -264,7 +347,7 @@ * This number does not include the previous documents. */ long getNumDocuments() { - return docIdsToDelete.getSize(); + return docIdsToDelete.getSize() + leafDocIdsToDelete.size(); } /** @@ -289,9 +372,14 @@ return; } if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) { - addDocument(id); // Collect id of all previous docs also - addPreviousDocuments(previousDocIdsFor(doc)); + Iterator previousDocs = previousDocIdsFor(doc); + if (!doc.hasChildren() && !previousDocs.hasNext()) { + addLeafDocument(id); + } else { + addDocument(id); + addPreviousDocuments(previousDocs); + } } } @@ -304,11 +392,22 @@ * @param stats to track the number of removed documents. */ void removeDocuments(VersionGCStats stats) throws IOException { - stats.deletedDocGCCount += removeDeletedDocuments(); + removeLeafDocuments(stats); + stats.deletedDocGCCount += removeDeletedDocuments(getDocIdsToDelete(), "(other)"); // FIXME: this is incorrect because that method also removes intermediate docs stats.splitDocGCCount += removeDeletedPreviousDocuments(); } + boolean hasLeafBatch() { + return leafDocIdsToDelete.size() >= DELETE_BATCH_SIZE; + } + + void removeLeafDocuments(VersionGCStats stats) throws IOException { + int removeCount = removeDeletedDocuments(getLeafDocIdsToDelete(), "(leaf)"); + stats.deletedLeafDocGCCount += removeCount; + stats.deletedDocGCCount += removeCount; + } + public void close() { try { docIdsToDelete.close(); @@ -357,6 +456,10 @@ docIdsToDelete.add(id); } + private void addLeafDocument(String id) throws IOException { + leafDocIdsToDelete.add(id); + } + private long getNumPreviousDocuments() { return prevDocIdsToDelete.getSize() - exclude.size(); } @@ -372,6 +475,10 @@ return docIdsToDelete.getIds(); } + private Iterator getLeafDocIdsToDelete() throws IOException { + return leafDocIdsToDelete.iterator(); + } + private void concurrentModification(NodeDocument doc) { Iterator it = doc.getAllPreviousDocs(); while (it.hasNext()) { @@ -390,9 +497,8 @@ }); } - private int removeDeletedDocuments() throws IOException { - Iterator docIdsToDelete = getDocIdsToDelete(); - log.info("Proceeding to delete [{}] documents", getNumDocuments()); + private int removeDeletedDocuments(Iterator docIdsToDelete, String label) throws IOException { + log.info("Proceeding to delete [{}] documents [{}]", getNumDocuments(), label); Iterator> idListItr = partition(docIdsToDelete, DELETE_BATCH_SIZE); int deletedCount = 0; Index: oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java =================================================================== --- oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java (Revision 1781354) +++ oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java (Arbeitskopie) @@ -156,6 +156,7 @@ VersionGCStats stats = gc.gc(maxAge * 2, HOURS); assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount); + assertEquals(noOfDocsToDelete, stats.deletedLeafDocGCCount); assertNull(ts.find(Collection.NODES, "1:/x")); @@ -204,6 +205,7 @@ VersionGCStats stats = gc.gc(maxAge * 2, HOURS); assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount); + assertEquals(noOfDocsToDelete, stats.deletedLeafDocGCCount); } @Test @@ -340,6 +342,7 @@ VersionGarbageCollector gc = store.getVersionGarbageCollector(); VersionGCStats stats = gc.gc(30, MINUTES); assertEquals(90, stats.deletedDocGCCount); + assertEquals(90, stats.deletedLeafDocGCCount); queries.release(2); Index: oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java =================================================================== --- oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java (Revision 1781354) +++ oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java (Arbeitskopie) @@ -180,6 +180,7 @@ clock.waitUntil(clock.getTime() + delta); stats = gc.gc(maxAge*2, HOURS); assertEquals(0, stats.deletedDocGCCount); + assertEquals(0, stats.deletedLeafDocGCCount); //3. Check that deleted doc does get collected post maxAge clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); @@ -186,6 +187,7 @@ stats = gc.gc(maxAge*2, HOURS); assertEquals(1, stats.deletedDocGCCount); + assertEquals(1, stats.deletedLeafDocGCCount); //4. Check that a revived doc (deleted and created again) does not get gc NodeBuilder b3 = store.getRoot().builder(); @@ -199,6 +201,7 @@ clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta); stats = gc.gc(maxAge*2, HOURS); assertEquals(0, stats.deletedDocGCCount); + assertEquals(0, stats.deletedLeafDocGCCount); } @@ -249,6 +252,7 @@ clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge) + delta); VersionGCStats stats = gc.gc(maxAge, HOURS); assertEquals(2, stats.splitDocGCCount); + assertEquals(0, stats.deletedLeafDocGCCount); //Previous doc should be removed assertNull(getDoc(previousDocTestFoo.get(0).getPath())); @@ -306,6 +310,7 @@ clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge) + delta); VersionGCStats stats = gc.gc(maxAge, HOURS); assertEquals(10, stats.splitDocGCCount); + assertEquals(0, stats.deletedLeafDocGCCount); DocumentNodeState test = getDoc("/test").getNodeAtRevision( store, store.getHeadRevision(), null); @@ -342,6 +347,7 @@ VersionGCStats stats = gc.gc(maxAge, HOURS); assertEquals(1, stats.deletedDocGCCount); + assertEquals(1, stats.deletedLeafDocGCCount); Set children = Sets.newHashSet(); for (ChildNodeEntry entry : store.getRoot().getChildNodeEntries()) { @@ -369,6 +375,7 @@ VersionGCStats stats = gc.gc(maxAge, HOURS); assertEquals(2, stats.splitDocGCCount); + assertEquals(0, stats.deletedLeafDocGCCount); NodeDocument doc = getDoc("/foo"); assertNotNull(doc); @@ -500,6 +507,7 @@ VersionGCStats stats = f.get(); assertEquals(1, stats.deletedDocGCCount); assertEquals(2, stats.splitDocGCCount); + assertEquals(0, stats.deletedLeafDocGCCount); } // OAK-4819 @@ -530,6 +538,7 @@ // gc must not fail VersionGCStats stats = gc.gc(maxAge, HOURS); assertEquals(1, stats.deletedDocGCCount); + assertEquals(1, stats.deletedLeafDocGCCount); } @Test