Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (revision 1577153) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (working copy) @@ -70,6 +70,12 @@ public static final int DEFAULT_BATCH_COUNT = 2048; + public static final String NOT_RUNNING = "NotRunning"; + + public static final String MARKING = "Running-Marking"; + + public static final String SWEEPING = "Running-Sweeping"; + /** The max last modified time of blobs to consider for garbage collection. */ private long maxLastModifiedTime; @@ -91,6 +97,12 @@ /** The batch count. */ private int batchCount = DEFAULT_BATCH_COUNT; + /** Flag to indicate whether to run in a debug mode **/ + private boolean debugMode = Boolean.getBoolean("debugModeGC") | LOG.isDebugEnabled(); + + /** Flag to indicate the state of the gc **/ + private String state; + /** * Gets the max last modified time considered for garbage collection. * @@ -146,6 +158,15 @@ } /** + * Gets the state of the gc process. + * + * @return the state + */ + protected String getState() { + return state; + } + + /** * @param nodeStore the node store * @param root the root * @param batchCount the batch count @@ -206,6 +227,7 @@ LOG.debug("garbage collector finished"); } finally { fs.complete(); + state = NOT_RUNNING; } } @@ -216,6 +238,7 @@ * the exception */ protected void mark() throws Exception { + state = MARKING; LOG.debug("Starting mark phase of the garbage collector"); // Find all blobs available in the blob store @@ -287,6 +310,7 @@ * Signals that an I/O exception has occurred. */ protected void sweep() throws IOException { + state = SWEEPING; LOG.debug("Starting sweep phase of the garbage collector"); ConcurrentLinkedQueue exceptionQueue = new ConcurrentLinkedQueue(); @@ -428,12 +452,21 @@ int referencesFound = 0; while (blobIterator.hasNext()) { Blob blob = blobIterator.next(); + + if (debugMode) { + LOG.debug("BlobId : " + blob.toString()); + } + if (blob.toString().length() != 0) { Iterator idIter = ((GarbageCollectableBlobStore) nodeStore .getBlobStore()) .resolveChunks(blob.toString()); while (idIter.hasNext()) { - referencedBlobs.add(idIter.next()); + String id = idIter.next(); + referencedBlobs.add(id); + if (debugMode) { + LOG.debug("chunkId : " + id); + } } } Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java (revision 1577153) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/BlobReferenceIterator.java (working copy) @@ -95,7 +95,10 @@ private boolean loadBatchQuery() { // read about BATCH_SIZE documents - List list = docStore.query(Collection.NODES, fromKey, "999999", BATCH_SIZE); + List list = + docStore.query(Collection.NODES, fromKey, ";", NodeDocument.HAS_BINARY_FLAG, + NodeDocument.HAS_BINARY_VAL, + BATCH_SIZE); boolean hasMore = false; for (NodeDocument doc : list) { if (doc.getId().equals(fromKey)) { Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java (revision 1577153) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/Commit.java (working copy) @@ -62,7 +62,10 @@ private HashSet addedNodes = new HashSet(); private HashSet removedNodes = new HashSet(); - + + /** Set of all nodes which have binary properties. **/ + private HashSet nodesWithBinaries = Sets.newHashSet(); + Commit(DocumentNodeStore nodeStore, Revision baseRevision, Revision revision) { this.baseRevision = baseRevision; this.revision = revision; @@ -123,6 +126,10 @@ op.setMapEntry(key, revision, value); } + void markNodeHavingBinary(String path) { + this.nodesWithBinaries.add(path); + } + void addNode(DocumentNodeState n) { String path = n.getPath(); if (operations.containsKey(path)) { @@ -185,6 +192,7 @@ private void applyInternal() { if (!operations.isEmpty()) { updateParentChildStatus(); + updateBinaryStatus(); applyToDocumentStore(); } } @@ -192,11 +200,28 @@ private void prepare(Revision baseRevision) { if (!operations.isEmpty()) { updateParentChildStatus(); + updateBinaryStatus(); applyToDocumentStore(baseRevision); } } /** + * Update the binary status in the update op. + */ + private void updateBinaryStatus() { + DocumentStore store = this.nodeStore.getDocumentStore(); + + for (String path : this.nodesWithBinaries) { + NodeDocument nd = + (NodeDocument) store.getIfCached(Collection.NODES, Utils.getIdFromPath(path)); + if ((nd == null) || (nd.hasBinary() != 1)) { + UpdateOp updateParentOp = getUpdateOperationForNode(path); + NodeDocument.setHasBinary(updateParentOp); + } + } + } + + /** * Apply the changes to the document store. */ void applyToDocumentStore() { Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java (revision 1577153) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/CommitDiff.java (working copy) @@ -20,6 +20,7 @@ import org.apache.jackrabbit.oak.commons.json.JsopBuilder; import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.kernel.BlobSerializer; import org.apache.jackrabbit.oak.kernel.JsonSerializer; @@ -111,5 +112,9 @@ JsonSerializer serializer = new JsonSerializer(builder, blobs); serializer.serialize(property); commit.updateProperty(path, property.getName(), serializer.toString()); + if ((property.getType() == Type.BINARY) + || (property.getType() == Type.BINARIES)) { + this.commit.markNodeHavingBinary(this.path); + } } } Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java (revision 1577153) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStore.java (working copy) @@ -125,6 +125,14 @@ options.put("unique", Boolean.FALSE); nodes.ensureIndex(index, options); + // index on the _bin flag to faster access nodes with binaries for GC + index = new BasicDBObject(); + index.put(NodeDocument.HAS_BINARY_FLAG, Integer.valueOf(1)); + options = new BasicDBObject(); + options.put("unique", Boolean.FALSE); + options.put("sparse", Boolean.TRUE); + this.nodes.ensureIndex(index, options); + // TODO expire entries if the parent was changed if (builder.useOffHeapCache()) { nodesCache = createOffHeapCache(builder); Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java (revision 1577153) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java (working copy) @@ -164,12 +164,16 @@ */ public static final String PATH = "_path"; + public static final String HAS_BINARY_FLAG = "_bin"; + /** * Properties to ignore when a document is split. */ private static final Set IGNORE_ON_SPLIT = ImmutableSet.of(ID, MOD_COUNT, MODIFIED, PREVIOUS, - LAST_REV, CHILDREN_FLAG); + LAST_REV, CHILDREN_FLAG, HAS_BINARY_FLAG); + public static final int HAS_BINARY_VAL = 1; + final DocumentStore store; /** @@ -268,6 +272,11 @@ return lastCheckTime.get(); } + public int hasBinary() { + Integer flag = (Integer) get(HAS_BINARY_FLAG); + return flag != null ? flag.intValue() : 0; + } + /** * @return a map of the last known revision for each clusterId. */ @@ -942,6 +951,10 @@ checkNotNull(low).toString()); } + public static void setHasBinary(@Nonnull UpdateOp op) { + checkNotNull(op).set(HAS_BINARY_FLAG, HAS_BINARY_VAL); + } + //----------------------------< internal >---------------------------------- /**