### Eclipse Workspace Patch 1.0
#P oak-core
Index: src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java
===================================================================
--- src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (revision 1672555)
+++ src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (working copy)
@@ -23,6 +23,7 @@
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.List;
import java.util.Map;
import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy;
@@ -33,23 +34,39 @@
public class SegmentIdTable {
/**
- * Hash table of weak references to segment identifiers that are currently
- * being accessed. The size of the table is always a power of two, which
- * optimizes the {@link #expand()} operation. The table is indexed by the
- * random identifier bits, which guarantees uniform distribution of entries.
- * Each table entry is either {@code null} (when there are no matching
- * identifiers) or a list of weak references to the matching identifiers.
+ * The list of weak references to segment identifiers that are currently
+ * being accessed. This represents a hash table that uses open addressing
+ * with linear probing. It is not a hash map, to speed up read access.
+ *
+ * The size of the table is always a power of two, so that we can use
+ * bitwise "and" instead of modulo.
+ *
+ * The table is indexed by the random identifier bits, which guarantees
+ * uniform distribution of entries.
*
- * Actually, this is a array. It's not a hash map, to conserve memory (maps
- * need much more memory).
+ * Open addressing with linear probing is used. Each table entry is either
+ * null (when there are no matching identifiers), a weak references to the
+ * matching identifier, or a weak reference to another identifier.
+ * There are no tombstone entries as there is no explicit remove operation,
+ * but a referent can become null if the entry is garbage collected.
*
- * The list is not sorted (we could; lookup would be faster, but adding and
- * removing entries would be slower).
+ * The array is not sorted (we could; lookup might be faster, but adding
+ * entries would be slower).
*/
private final ArrayList> references =
newArrayList(nCopies(1024, (WeakReference) null));
private final SegmentTracker tracker;
+
+ /**
+ * The refresh count (for diagnostics and testing).
+ */
+ private int rebuildCount;
+
+ /**
+ * The number of used entries (WeakReferences) in this table.
+ */
+ private int entryCount;
SegmentIdTable(SegmentTracker tracker) {
this.tracker = tracker;
@@ -75,13 +92,20 @@
&& id.getLeastSignificantBits() == lsb) {
return id;
}
+ // shouldRefresh if we have a garbage collected entry
shouldRefresh = shouldRefresh || id == null;
+ // open addressing / linear probing
index = (index + 1) % references.size();
reference = references.get(index);
}
SegmentId id = new SegmentId(tracker, msb, lsb);
references.set(index, new WeakReference(id));
+ entryCount++;
+ if (entryCount > references.size() * 0.75) {
+ // more than 75% full
+ shouldRefresh = true;
+ }
if (shouldRefresh) {
refresh();
}
@@ -91,7 +115,7 @@
/**
* Returns all segment identifiers that are currently referenced in memory.
*
- * @return referenced segment identifiers
+ * @param ids referenced segment identifiers
*/
void collectReferencedIds(Collection ids) {
ids.addAll(refresh());
@@ -113,16 +137,27 @@
hashCollisions = hashCollisions || (i != getIndex(id));
} else {
references.set(i, null);
+ entryCount--;
emptyReferences = true;
}
}
}
+
+ // even thought the entry count should already be like that,
+ // for security we set it again, because having a wrong entry count
+ // would be very problematic
+ entryCount = ids.size();
while (2 * ids.size() > size) {
size *= 2;
}
+ // we need to re-build the table if the new size is different,
+ // but also if we removed some of the entries (because an entry was
+ // garbage collected) and there is at least one entry at the "wrong"
+ // location (due to open addressing)
if ((hashCollisions && emptyReferences) || size != references.size()) {
+ rebuildCount++;
references.clear();
references.addAll(nCopies(size, (WeakReference) null));
@@ -157,7 +192,6 @@
if (id != null) {
if (strategy.canRemove(id)) {
reference.clear();
- references.set(i, null);
dirty = true;
}
}
@@ -167,4 +201,50 @@
refresh();
}
}
+
+ /**
+ * Get the number of map rebuild operations (used for testing and diagnostics).
+ *
+ * @return the rebuild count
+ */
+ int getMapRebuildCount() {
+ return rebuildCount;
+ }
+
+ /**
+ * Get the entry count (used for testing and diagnostics).
+ *
+ * @return the entry count
+ */
+ int getEntryCount() {
+ return entryCount;
+ }
+
+ /**
+ * Get the size of the internal map (used for testing and diagnostics).
+ *
+ * @return the map size
+ */
+ int getMapSize() {
+ return references.size();
+ }
+
+ /**
+ * Get the raw list of segment ids (used for testing).
+ *
+ * @return the raw list
+ */
+ List getRawSegmentIdList() {
+ ArrayList list = new ArrayList();
+ for (WeakReference ref : references) {
+ if (ref != null) {
+ SegmentId id = ref.get();
+ if (id != null) {
+ list.add(id);
+ }
+ }
+ }
+ return list;
+ }
+
}
Index: src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java
===================================================================
--- src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java (revision 0)
+++ src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java (working copy)
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.segment;
+
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import javax.annotation.Nonnull;
+
+import junit.framework.Assert;
+
+import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy;
+import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy.CleanupType;
+import org.apache.jackrabbit.oak.plugins.segment.memory.MemoryStore;
+import org.junit.Test;
+
+public class SegmentIdTableTest {
+
+ /**
+ * OAK-2752
+ */
+ @Test
+ public void endlessSearchLoop() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List refs = new ArrayList();
+ for (int i = 0; i < 1024; i++) {
+ refs.add(tbl.getSegmentId(i, i % 64));
+ }
+
+ Callable c = new Callable() {
+
+ @Override
+ public SegmentId call() throws Exception {
+ // (2,1) doesn't exist
+ return tbl.getSegmentId(2, 1);
+ }
+ };
+ Future f = Executors.newSingleThreadExecutor().submit(c);
+ SegmentId s = null;
+ try {
+ s = f.get(5, TimeUnit.SECONDS);
+ } catch (Exception e) {
+ Assert.fail(e.getMessage());
+ }
+ Assert.assertNotNull(s);
+ Assert.assertEquals(2, s.getMostSignificantBits());
+ Assert.assertEquals(1, s.getLeastSignificantBits());
+ }
+
+ @Test
+ public void randomized() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List refs = new ArrayList();
+ Random r = new Random(1);
+ for (int i = 0; i < 16 * 1024; i++) {
+ refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong()));
+ }
+ Assert.assertEquals(16 * 1024, tbl.getEntryCount());
+ Assert.assertEquals(16 * 2048, tbl.getMapSize());
+ Assert.assertEquals(5, tbl.getMapRebuildCount());
+
+ r = new Random(1);
+ for (int i = 0; i < 16 * 1024; i++) {
+ refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong()));
+ Assert.assertEquals(16 * 1024, tbl.getEntryCount());
+ Assert.assertEquals(16 * 2048, tbl.getMapSize());
+ Assert.assertEquals(5, tbl.getMapRebuildCount());
+ }
+ }
+
+ @Test
+ public void clearTable() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List refs = new ArrayList();
+ int originalCount = 8;
+ for (int i = 0; i < originalCount; i++) {
+ refs.add(tbl.getSegmentId(i, i % 2));
+ }
+ Assert.assertEquals(originalCount, tbl.getEntryCount());
+ Assert.assertEquals(0, tbl.getMapRebuildCount());
+
+ tbl.clearSegmentIdTables(new CompactionStrategy(false, false,
+ CleanupType.CLEAN_NONE, originalCount, (byte) 0) {
+
+ @Override
+ public boolean compacted(@Nonnull Callable setHead)
+ throws Exception {
+ return true;
+ }
+
+ @Override
+ public boolean canRemove(SegmentId id) {
+ return id.getMostSignificantBits() < 4;
+ }
+
+ });
+
+ Assert.assertEquals(4, tbl.getEntryCount());
+
+ for (SegmentId id : refs) {
+ if (id.getMostSignificantBits() >= 4) {
+ SegmentId id2 = tbl.getSegmentId(
+ id.getMostSignificantBits(),
+ id.getLeastSignificantBits());
+ List list = tbl.getRawSegmentIdList();
+ if (list.size() != new HashSet(list).size()) {
+ Collections.sort(list);
+ fail("duplicate entry " + list.toString());
+ }
+ Assert.assertTrue(id == id2);
+ }
+ }
+ }
+
+ @Test
+ public void justHashCollisions() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List refs = new ArrayList();
+ int originalCount = 1024;
+ for (int i = 0; i < originalCount; i++) {
+ // modulo 128 to ensure we have conflicts
+ refs.add(tbl.getSegmentId(i, i % 128));
+ }
+ Assert.assertEquals(originalCount, tbl.getEntryCount());
+ Assert.assertEquals(1, tbl.getMapRebuildCount());
+
+ List refs2 = new ArrayList();
+ tbl.collectReferencedIds(refs2);
+ Assert.assertEquals(refs.size(), refs2.size());
+
+ Assert.assertEquals(originalCount, tbl.getEntryCount());
+ // we don't expect that there was a refresh,
+ // because there were just hash collisions
+ Assert.assertEquals(1, tbl.getMapRebuildCount());
+ }
+
+ @Test
+ public void gc() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List refs = new ArrayList();
+ int originalCount = 1024;
+ for (int i = 0; i < originalCount; i++) {
+ // modulo 128 to ensure we have conflicts
+ refs.add(tbl.getSegmentId(i, i % 128));
+ }
+ Assert.assertEquals(originalCount, tbl.getEntryCount());
+ Assert.assertEquals(1, tbl.getMapRebuildCount());
+
+ for (int i = 0; i < refs.size() / 2; i++) {
+ // we need to remove the first entries,
+ // because if we remove the last entries, then
+ // getSegmentId would not detect that entries were freed up
+ refs.remove(0);
+ }
+ for (int gcCalls = 0;; gcCalls++) {
+ // needed here, so some entries can be garbage collected
+ System.gc();
+
+ for (SegmentId id : refs) {
+ SegmentId id2 = tbl.getSegmentId(id.getMostSignificantBits(), id.getLeastSignificantBits());
+ Assert.assertTrue(id2 == id);
+ }
+ // because we found each entry, we expect the refresh count is the same
+ Assert.assertEquals(1, tbl.getMapRebuildCount());
+
+ // even thought this does not increase the entry count a lot,
+ // it is supposed to detect that entries were removed,
+ // and force a refresh, which would get rid of the unreferenced ids
+ for (int i = 0; i < 10; i++) {
+ tbl.getSegmentId(i, i);
+ }
+
+ if (tbl.getEntryCount() < originalCount) {
+ break;
+ } else if (gcCalls > 10) {
+ fail("No entries were garbage collected after 10 times System.gc()");
+ }
+ }
+ Assert.assertEquals(2, tbl.getMapRebuildCount());
+ }
+}
\ No newline at end of file