Index: src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java
===================================================================
--- src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (revision 1672555)
+++ src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (working copy)
@@ -16,38 +16,52 @@
*/
package org.apache.jackrabbit.oak.plugins.segment;
-import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Maps.newHashMapWithExpectedSize;
-import static java.util.Collections.nCopies;
import java.lang.ref.WeakReference;
-import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy;
/**
- * Hash table of weak references to segment identifiers.
+ * A collection of weak references to segment identifiers.
*/
public class SegmentIdTable {
/**
- * Hash table of weak references to segment identifiers that are currently
- * being accessed. The size of the table is always a power of two, which
- * optimizes the {@link #expand()} operation. The table is indexed by the
- * random identifier bits, which guarantees uniform distribution of entries.
- * Each table entry is either {@code null} (when there are no matching
- * identifiers) or a list of weak references to the matching identifiers.
+ * The array of weak references to segment identifiers that are currently
+ * being accessed. This represents a hash table that uses open addressing
+ * with linear probing. It is not a hash map, to speed up read access.
+ *
+ * The size of the table is always a power of two, so that we can use
+ * bitwise "and" instead of modulo.
*
- * Actually, this is a array. It's not a hash map, to conserve memory (maps
- * need much more memory).
+ * The table is indexed by the random identifier bits, which guarantees
+ * uniform distribution of entries.
*
- * The list is not sorted (we could; lookup would be faster, but adding and
- * removing entries would be slower).
+ * Open addressing with linear probing is used. Each table entry is either
+ * null (when there are no matching identifiers), a weak references to the
+ * matching identifier, or a weak reference to another identifier.
+ * There are no tombstone entries as there is no explicit remove operation,
+ * but a referent can become null if the entry is garbage collected.
+ *
+ * The array is not sorted (we could; lookup might be faster, but adding
+ * entries would be slower).
+ */
+ @SuppressWarnings("unchecked")
+ private WeakReference[] references =
+ (WeakReference[]) new WeakReference[1024];
+
+ /**
+ * The number of used entries in this table.
+ */
+ private int entryCount;
+
+ /**
+ * The resize count (for diagnostics and testing).
*/
- private final ArrayList> references =
- newArrayList(nCopies(1024, (WeakReference) null));
+ private int resizeCount;
private final SegmentTracker tracker;
@@ -65,9 +79,11 @@
synchronized SegmentId getSegmentId(long msb, long lsb) {
int first = getIndex(lsb);
int index = first;
- boolean shouldRefresh = false;
+ boolean resizeNeeded = false;
+
+ int size = references.length;
- WeakReference reference = references.get(index);
+ WeakReference reference = references[index];
while (reference != null) {
SegmentId id = reference.get();
if (id != null
@@ -75,64 +91,110 @@
&& id.getLeastSignificantBits() == lsb) {
return id;
}
- shouldRefresh = shouldRefresh || id == null;
- index = (index + 1) % references.size();
- reference = references.get(index);
+ if (id == null) {
+ // we have a garbage collected entry: resize
+ resizeNeeded = true;
+ }
+ // open addressing / linear probing
+ index = (index + 1) & (size - 1);
+ reference = references[index];
+ }
+
+ if (entryCount > size * 0.75) {
+ // more than 75% full
+ resizeNeeded = true;
}
SegmentId id = new SegmentId(tracker, msb, lsb);
- references.set(index, new WeakReference(id));
- if (shouldRefresh) {
- refresh();
+ entryCount++;
+ references[index] = new WeakReference(id);
+ if (resizeNeeded) {
+ refreshAndResize();
}
return id;
}
+
+ /**
+ * Get the internal map size (used for testing and diagnostics).
+ *
+ * @return the map size
+ */
+ int getMapSize() {
+ return references.length;
+ }
+
+ /**
+ * Get the entry count (used for testing and diagnostics).
+ *
+ * @return the entry count
+ */
+ int getEntryCount() {
+ return entryCount;
+ }
+
+ /**
+ * Get the number of resize operations (used for testing and diagnostics).
+ *
+ * @return the resize count
+ */
+ int getResizeCount() {
+ return resizeCount;
+ }
/**
* Returns all segment identifiers that are currently referenced in memory.
- *
- * @return referenced segment identifiers
*/
void collectReferencedIds(Collection ids) {
- ids.addAll(refresh());
+ ids.addAll(refreshAndResize());
}
- private synchronized Collection refresh() {
- int size = references.size();
+ @SuppressWarnings("unchecked")
+ private synchronized Collection refreshAndResize() {
+ resizeCount++;
+ int size = references.length;
Map> ids =
newHashMapWithExpectedSize(size);
+ // if there is a collision (that is, if there is an entry that is at the
+ // wrong location), then we recreate the map
boolean hashCollisions = false;
boolean emptyReferences = false;
for (int i = 0; i < size; i++) {
- WeakReference reference = references.get(i);
+ WeakReference reference = references[i];
if (reference != null) {
SegmentId id = reference.get();
if (id != null) {
ids.put(id, reference);
- hashCollisions = hashCollisions || (i != getIndex(id));
+ if (i != getIndex(id)) {
+ hashCollisions = true;
+ }
} else {
- references.set(i, null);
+ references[i] = null;
emptyReferences = true;
}
}
}
-
- while (2 * ids.size() > size) {
+ entryCount = ids.size();
+ size = 2;
+ while (entryCount > size * 0.75) {
+ // more than 75% full
size *= 2;
}
- if ((hashCollisions && emptyReferences) || size != references.size()) {
- references.clear();
- references.addAll(nCopies(size, (WeakReference) null));
+ // we need to re-build the table if the new size is different,
+ // but also if we removed some of the entries (because an entry was
+ // garbage collected) and there is at least one entry at the "wrong"
+ // location (due to open addressing)
+ if (size != references.length || (hashCollisions && emptyReferences)) {
+ references = (WeakReference[]) new WeakReference[size];
for (Map.Entry> entry
: ids.entrySet()) {
int index = getIndex(entry.getKey());
- while (references.get(index) != null) {
- index = (index + 1) % size;
+ while (references[index] != null) {
+ index = (index + 1) & (size - 1);
}
- references.set(index, entry.getValue());
+ references[index] = entry.getValue();
}
}
@@ -144,27 +206,27 @@
}
private int getIndex(long lsb) {
- return ((int) lsb) & (references.size() - 1);
+ return ((int) lsb) & (references.length - 1);
}
synchronized void clearSegmentIdTables(CompactionStrategy strategy) {
- int size = references.size();
+ int size = references.length;
boolean dirty = false;
for (int i = 0; i < size; i++) {
- WeakReference reference = references.get(i);
+ WeakReference reference = references[i];
if (reference != null) {
SegmentId id = reference.get();
if (id != null) {
if (strategy.canRemove(id)) {
reference.clear();
- references.set(i, null);
+ references[i] = null;
dirty = true;
}
}
}
}
if (dirty) {
- refresh();
+ refreshAndResize();
}
}
}
Index: src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTestIT.java
===================================================================
--- src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTestIT.java (revision 0)
+++ src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTestIT.java (working copy)
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.segment;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import junit.framework.Assert;
+
+import org.apache.jackrabbit.oak.plugins.segment.memory.MemoryStore;
+import org.junit.Test;
+
+public class SegmentIdTableTestIT {
+
+ /**
+ * OAK-2752
+ */
+ @Test
+ public void endlessSearchLoop() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List refs = new ArrayList();
+ for (int i = 0; i < 1024; i++) {
+ refs.add(tbl.getSegmentId(i, i % 64));
+ }
+
+ Callable c = new Callable() {
+
+ @Override
+ public SegmentId call() throws Exception {
+ // (2,1) doesn't exist
+ return tbl.getSegmentId(2, 1);
+ }
+ };
+ Future f = Executors.newSingleThreadExecutor().submit(c);
+ SegmentId s = null;
+ try {
+ s = f.get(5, TimeUnit.SECONDS);
+ } catch (Exception e) {
+ Assert.fail(e.getMessage());
+ }
+ Assert.assertNotNull(s);
+ Assert.assertEquals(2, s.getMostSignificantBits());
+ Assert.assertEquals(1, s.getLeastSignificantBits());
+ }
+
+ @Test
+ public void randomizedTest() {
+ SegmentTracker tracker = new MemoryStore().getTracker();
+ final SegmentIdTable tbl = new SegmentIdTable(tracker);
+
+ List refs = new ArrayList();
+ Random r = new Random(1);
+ for (int i = 0; i < 16 * 1024; i++) {
+ refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong()));
+ }
+ Assert.assertEquals(16 * 1024, tbl.getEntryCount());
+ Assert.assertEquals(16 * 2048, tbl.getMapSize());
+ Assert.assertEquals(5, tbl.getResizeCount());
+
+ r = new Random(1);
+ for (int i = 0; i < 16 * 1024; i++) {
+ refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong()));
+ Assert.assertEquals(16 * 1024, tbl.getEntryCount());
+ Assert.assertEquals(16 * 2048, tbl.getMapSize());
+ Assert.assertEquals(5, tbl.getResizeCount());
+ }
+
+ refs.clear();
+
+ }
+}
\ No newline at end of file