Index: src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java =================================================================== --- src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (revision 1599138) +++ src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (working copy) @@ -17,16 +17,13 @@ package org.apache.jackrabbit.oak.plugins.blob; import java.io.BufferedWriter; +import java.io.Closeable; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.sql.Timestamp; -import java.util.ArrayDeque; import java.util.Iterator; import java.util.List; -import java.util.NoSuchElementException; -import java.util.Set; -import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ExecutionException; @@ -38,8 +35,10 @@ import com.google.common.base.Joiner; import com.google.common.base.StandardSystemProperty; import com.google.common.base.Stopwatch; +import com.google.common.collect.AbstractIterator; +import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import com.google.common.collect.PeekingIterator; import com.google.common.io.Closeables; import com.google.common.io.Files; import com.google.common.util.concurrent.ListenableFutureTask; @@ -222,7 +221,7 @@ FileLineDifferenceIterator iter = new FileLineDifferenceIterator( fs.getMarkedRefs(), - fs.getAvailableRefs(), batchCount); + fs.getAvailableRefs()); BufferedWriter bufferWriter = null; try { @@ -245,6 +244,7 @@ LOG.debug("Found GC candidates - " + numCandidates); } finally { IOUtils.closeQuietly(bufferWriter); + IOUtils.closeQuietly(iter); } LOG.debug("Ending difference phase of the garbage collector"); @@ -449,138 +449,74 @@ } + /** * FileLineDifferenceIterator class which iterates over the difference of 2 files line by line. */ - static class FileLineDifferenceIterator implements Iterator { - - /** The marked references iterator. */ - private final LineIterator markedIter; - - /** The available references iter. */ - private final LineIterator allIter; - - private final ArrayDeque queue; - - private final int batchSize; - - private boolean done; - - /** Temporary buffer. */ - private TreeSet markedBuffer; + static class FileLineDifferenceIterator extends AbstractIterator implements Closeable{ + private final PeekingIterator peekMarked; + private final LineIterator marked; + private final LineIterator all; - /** - * Instantiates a new file line difference iterator. - */ - public FileLineDifferenceIterator(File marked, File available, int batchSize) throws IOException { - this.markedIter = FileUtils.lineIterator(marked); - this.allIter = FileUtils.lineIterator(available); - this.batchSize = batchSize; - queue = new ArrayDeque(batchSize); - markedBuffer = Sets.newTreeSet(); - + public FileLineDifferenceIterator(File marked, File available) throws IOException { + this(FileUtils.lineIterator(marked), FileUtils.lineIterator(available)); } - /** - * Close. - */ - private void close() { - LineIterator.closeQuietly(markedIter); - LineIterator.closeQuietly(allIter); + public FileLineDifferenceIterator(LineIterator marked, LineIterator available) throws IOException { + this.marked = marked; + this.peekMarked = Iterators.peekingIterator(marked); + this.all = available; } @Override - public boolean hasNext() { - if (!queue.isEmpty()) { - return true; - } else if (done) { - return false; - } else { - if (!markedIter.hasNext() && !allIter.hasNext()) { - done = true; - close(); - return false; - } else { - queue.addAll(difference()); - if (!queue.isEmpty()) { - return true; - } else { - done = true; - close(); - } - } + protected String computeNext() { + String diff = computeNextDiff(); + if (diff == null) { + close(); + return endOfData(); } - - return false; + return diff; } @Override - public String next() { - return nextDifference(); + public void close() { + LineIterator.closeQuietly(marked); + LineIterator.closeQuietly(all); } - /** - * Next difference. - * - * @return the string - */ - public String nextDifference() { - if (!hasNext()) { - throw new NoSuchElementException("No more difference"); + private String computeNextDiff() { + if (!all.hasNext()) { + return null; } - return queue.remove(); - } - - /** - * Difference. - * - * @return the sets the - */ - protected Set difference() { - TreeSet gcSet = new TreeSet(); - - // Iterate till the gc candidate set is at least SAVE_BATCH_COUNT or - // the - // blob id set iteration is complete - while (allIter.hasNext() && - gcSet.size() < batchSize) { - TreeSet allBuffer = new TreeSet(); - - while (markedIter.hasNext() && - markedBuffer.size() < batchSize) { - String stre = markedIter.next(); - markedBuffer.add(stre); - } - while (allIter.hasNext() && - allBuffer.size() < batchSize) { - String stre = allIter.next(); - allBuffer.add(stre); - } - - if (markedBuffer.isEmpty()) { - gcSet = allBuffer; - } else { - gcSet.addAll( - Sets.difference(allBuffer, markedBuffer)); - if (allBuffer.last().compareTo(markedBuffer.last()) < 0) { - // filling markedLeftoverBuffer - TreeSet markedLeftoverBuffer = Sets.newTreeSet(); - markedLeftoverBuffer.addAll(markedBuffer.tailSet(allBuffer.last(), false)); - markedBuffer = markedLeftoverBuffer; - markedLeftoverBuffer = null; + //Marked finish the rest of all are part of diff + if (!peekMarked.hasNext()) { + return all.next(); + } + + String diff = null; + while (all.hasNext() && diff == null) { + diff = all.next(); + while (peekMarked.hasNext()) { + String marked = peekMarked.peek(); + int comparisonResult = diff.compareTo(marked); + if (comparisonResult > 0) { + //Extra entries in marked. Ignore them and move on + peekMarked.next(); + } else if (comparisonResult == 0) { + //Matching entry found in marked move past it. Not a + //dif candidate + peekMarked.next(); + diff = null; + break; } else { - markedBuffer.clear(); + //This entry is not found in marked entries + //hence part of diff + return diff; } } } - - return gcSet; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); + return diff; } } Index: src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java =================================================================== --- src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java (revision 0) +++ src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java (working copy) @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.plugins.blob; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.TreeSet; + +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; +import com.google.common.base.StandardSystemProperty; +import com.google.common.collect.ImmutableList; +import org.apache.commons.io.LineIterator; +import org.junit.Test; + +import static java.util.Arrays.asList; +import static org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector.FileLineDifferenceIterator; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +public class FileLineDifferenceIteratorTest { + + @Test + public void testRandomized() throws Exception { + Random r = new Random(0); + for (int i = 0; i < 10000; i++) { + TreeSet marked = new TreeSet(); + TreeSet all = new TreeSet(); + TreeSet diff = new TreeSet(); + int size = r.nextInt(5); + for (int a = 0; a < size; a++) { + marked.add("" + r.nextInt(10)); + } + size = r.nextInt(5); + for (int a = 0; a < size; a++) { + all.add("" + r.nextInt(10)); + } + diff.addAll(all); + diff.removeAll(marked); + String m = marked.toString().replaceAll("[ \\[\\]]", ""); + String a = all.toString().replaceAll("[ \\[\\]]", ""); + assertDiff(m, a, + new ArrayList(diff)); + } + } + + @Test + public void testNoDiff() throws Exception { + assertDiff("a,b,c", "a,b,c", Collections. emptyList()); + assertDiff("a,b,c,d,f", "a,b,f", Collections. emptyList()); + } + + @Test + public void testSimpleDiff() throws Exception { + assertDiff("a,b", "a,b,c", asList("c")); + assertDiff("a,b", "", Collections. emptyList()); + assertDiff("", "", Collections. emptyList()); + assertDiff("", "a", asList("a")); + assertDiff("", "a, b", asList("a", "b")); + } + + @Test + public void testDiffWithExtraEntriesInMarked() throws IOException { + assertDiff("a,b", "a,b,c, e, h", asList("c", "e", "h")); + assertDiff("a,b,d,e", "a,b,c", asList("c")); + assertDiff("a,b,d,e,f", "a,b,c,f", asList("c")); + assertDiff("a,b,d,e,f", "a,b,c,f, h", asList("c", "h")); + assertDiff("3,7", "2,3,5,9", asList("2", "5", "9")); + } + + private static void assertDiff(String marked, String all, List diff) throws IOException { + Iterator itr = createItr(marked, all); + assertThat("marked: " + marked + " all: " + all, ImmutableList.copyOf(itr), is(diff)); + } + + private static Iterator createItr(String marked, String all) throws IOException { + return new FileLineDifferenceIterator(lineItr(marked), lineItr(all)); + } + + private static LineIterator lineItr(String seq) { + Iterable seqItr = Splitter.on(',').trimResults().split(seq); + String lines = Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).join(seqItr); + return new LineIterator(new StringReader(lines)); + } + +} Index: src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java =================================================================== --- src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (revision 1599138) +++ src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (working copy) @@ -16,10 +16,7 @@ */ package org.apache.jackrabbit.oak.plugins.document; -import static org.junit.Assert.assertTrue; - import java.io.ByteArrayInputStream; -import java.io.IOException; import java.io.InputStream; import java.util.HashSet; import java.util.Iterator; @@ -28,14 +25,12 @@ import java.util.Set; import java.util.concurrent.TimeUnit; -import com.google.common.util.concurrent.MoreExecutors; -import junit.framework.Assert; - import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import com.google.common.util.concurrent.MoreExecutors; import com.mongodb.BasicDBObject; import com.mongodb.DBCollection; - +import junit.framework.Assert; import org.apache.jackrabbit.oak.api.Blob; import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector; import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats; @@ -46,6 +41,8 @@ import org.apache.jackrabbit.oak.stats.Clock; import org.junit.Test; +import static org.junit.Assert.assertTrue; + /** * Tests for MongoMK GC */ @@ -61,7 +58,7 @@ int number = 10; // track the number of the assets to be deleted List processed = Lists.newArrayList(); - Random rand = new Random(); + Random rand = new Random(47); for (int i = 0; i < 5; i++) { int n = rand.nextInt(number); if (!processed.contains(n)) { @@ -104,6 +101,18 @@ return set; } + public HashSet addInlined() throws Exception { + HashSet set = new HashSet(); + DocumentNodeStore s = mk.getNodeStore(); + NodeBuilder a = s.getRoot().builder(); + int number = 12; + for (int i = 0; i < number; i++) { + Blob b = s.createBlob(randomStream(i, 50)); + a.child("cinline" + i).setProperty("x", b); + } + s.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY); + return set; + } private void deleteFromMongo(String nodeId) { DBCollection coll = mongoConnection.getDB().getCollection("nodes"); BasicDBObject blobNodeObj = new BasicDBObject(); @@ -123,18 +132,30 @@ gc(set); } + @Test + public void gcDirectMongoDeleteWithInlined() throws Exception { + HashSet set = setUp(true); + addInlined(); + gc(set); + } + @Test + public void gcVersionDeleteWithInlined() throws Exception { + HashSet set = setUp(false); + addInlined(); + gc(set); + } private void gc(HashSet set) throws Exception { DocumentNodeStore store = mk.getNodeStore(); MarkSweepGarbageCollector gc = new MarkSweepGarbageCollector( new DocumentBlobReferenceRetriever(store), (GarbageCollectableBlobStore) store.getBlobStore(), MoreExecutors.sameThreadExecutor(), - "./target", 2048, true, 0); + "./target", 5, true, 0); gc.collectGarbage(); Set existing = iterate(); boolean empty = Sets.intersection(set, existing).isEmpty(); - assertTrue(empty); + assertTrue(empty && !existing.isEmpty()); } protected Set iterate() throws Exception {