Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (revision 1598486) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (working copy) @@ -466,18 +466,21 @@ private boolean done; - /** Temporary buffer. */ + /** Temporary buffers. */ private TreeSet markedBuffer; + private TreeSet allBuffer; /** * Instantiates a new file line difference iterator. */ - public FileLineDifferenceIterator(File marked, File available, int batchSize) throws IOException { + public FileLineDifferenceIterator(File marked, File available, int batchSize) + throws IOException { this.markedIter = FileUtils.lineIterator(marked); this.allIter = FileUtils.lineIterator(available); this.batchSize = batchSize; queue = new ArrayDeque(batchSize); markedBuffer = Sets.newTreeSet(); + allBuffer = Sets.newTreeSet(); } @@ -540,11 +543,9 @@ TreeSet gcSet = new TreeSet(); // Iterate till the gc candidate set is at least SAVE_BATCH_COUNT or - // the - // blob id set iteration is complete - while (allIter.hasNext() && + // the blob id set iteration is complete + while ((allIter.hasNext() || markedIter.hasNext()) && gcSet.size() < batchSize) { - TreeSet allBuffer = new TreeSet(); while (markedIter.hasNext() && markedBuffer.size() < batchSize) { @@ -551,8 +552,9 @@ String stre = markedIter.next(); markedBuffer.add(stre); } - while (allIter.hasNext() && - allBuffer.size() < batchSize) { + + while (allIter.hasNext() + && allBuffer.size() < batchSize) { String stre = allIter.next(); allBuffer.add(stre); } @@ -560,21 +562,30 @@ if (markedBuffer.isEmpty()) { gcSet = allBuffer; } else { + TreeSet allLeftoverBuffer = Sets.newTreeSet(); + TreeSet markedLeftoverBuffer = Sets.newTreeSet(); + + /* Remove & track elements in allBuffer > max seen in markedBuffer Or */ + /* Track elements in markedBuffer > max seen in allBuffer */ + if (allBuffer.last().compareTo(markedBuffer.last()) > 0) { + allLeftoverBuffer.addAll(allBuffer.tailSet(markedBuffer.last(), false)); + allBuffer.removeAll(allLeftoverBuffer); + } else if (allBuffer.last().compareTo(markedBuffer.last()) < 0) { + markedLeftoverBuffer.addAll(markedBuffer.tailSet(allBuffer.last(), false)); + } + gcSet.addAll( Sets.difference(allBuffer, markedBuffer)); - if (allBuffer.last().compareTo(markedBuffer.last()) < 0) { - // filling markedLeftoverBuffer - TreeSet markedLeftoverBuffer = Sets.newTreeSet(); - markedLeftoverBuffer.addAll(markedBuffer.tailSet(allBuffer.last(), false)); - markedBuffer = markedLeftoverBuffer; - markedLeftoverBuffer = null; - } else { - markedBuffer.clear(); - } + markedBuffer = markedLeftoverBuffer; + allBuffer = allLeftoverBuffer; } } - + // Add any potential leftover blobs to be gc'ed. + if (!allBuffer.isEmpty()) { + gcSet.addAll(allBuffer); + allBuffer.clear(); + } return gcSet; } @@ -583,7 +594,7 @@ throw new UnsupportedOperationException(); } } - + /** * Provides a readable string for given timestamp */ Index: oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java =================================================================== --- oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (revision 1598486) +++ oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (working copy) @@ -19,7 +19,6 @@ import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; -import java.io.IOException; import java.io.InputStream; import java.util.HashSet; import java.util.Iterator; @@ -61,7 +60,7 @@ int number = 10; // track the number of the assets to be deleted List processed = Lists.newArrayList(); - Random rand = new Random(); + Random rand = new Random(47); for (int i = 0; i < 5; i++) { int n = rand.nextInt(number); if (!processed.contains(n)) { @@ -104,6 +103,18 @@ return set; } + public HashSet addInlined() throws Exception { + HashSet set = new HashSet(); + DocumentNodeStore s = mk.getNodeStore(); + NodeBuilder a = s.getRoot().builder(); + int number = 12; + for (int i = 0; i < number; i++) { + Blob b = s.createBlob(randomStream(i, 50)); + a.child("cinline" + i).setProperty("x", b); + } + s.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY); + return set; + } private void deleteFromMongo(String nodeId) { DBCollection coll = mongoConnection.getDB().getCollection("nodes"); BasicDBObject blobNodeObj = new BasicDBObject(); @@ -123,6 +134,18 @@ gc(set); } + @Test + public void gcDirectMongoDeleteWithInlined() throws Exception { + HashSet set = setUp(true); + addInlined(); + gc(set); + } + @Test + public void gcVersionDeleteWithInlined() throws Exception { + HashSet set = setUp(false); + addInlined(); + gc(set); + } private void gc(HashSet set) throws Exception { DocumentNodeStore store = mk.getNodeStore(); MarkSweepGarbageCollector gc = new MarkSweepGarbageCollector( @@ -129,12 +152,12 @@ new DocumentBlobReferenceRetriever(store), (GarbageCollectableBlobStore) store.getBlobStore(), MoreExecutors.sameThreadExecutor(), - "./target", 2048, true, 0); + "./target", 5, true, 0); gc.collectGarbage(); Set existing = iterate(); boolean empty = Sets.intersection(set, existing).isEmpty(); - assertTrue(empty); + assertTrue(empty && !existing.isEmpty()); } protected Set iterate() throws Exception {