### Eclipse Workspace Patch 1.0 #P oak-core Index: src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java =================================================================== --- src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (revision 1672555) +++ src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTable.java (working copy) @@ -23,6 +23,7 @@ import java.lang.ref.WeakReference; import java.util.ArrayList; import java.util.Collection; +import java.util.List; import java.util.Map; import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy; @@ -33,23 +34,39 @@ public class SegmentIdTable { /** - * Hash table of weak references to segment identifiers that are currently - * being accessed. The size of the table is always a power of two, which - * optimizes the {@link #expand()} operation. The table is indexed by the - * random identifier bits, which guarantees uniform distribution of entries. - * Each table entry is either {@code null} (when there are no matching - * identifiers) or a list of weak references to the matching identifiers. + * The list of weak references to segment identifiers that are currently + * being accessed. This represents a hash table that uses open addressing + * with linear probing. It is not a hash map, to speed up read access. + *

+ * The size of the table is always a power of two, so that we can use + * bitwise "and" instead of modulo. + *

+ * The table is indexed by the random identifier bits, which guarantees + * uniform distribution of entries. *

- * Actually, this is a array. It's not a hash map, to conserve memory (maps - * need much more memory). + * Open addressing with linear probing is used. Each table entry is either + * null (when there are no matching identifiers), a weak references to the + * matching identifier, or a weak reference to another identifier. + * There are no tombstone entries as there is no explicit remove operation, + * but a referent can become null if the entry is garbage collected. *

- * The list is not sorted (we could; lookup would be faster, but adding and - * removing entries would be slower). + * The array is not sorted (we could; lookup might be faster, but adding + * entries would be slower). */ private final ArrayList> references = newArrayList(nCopies(1024, (WeakReference) null)); private final SegmentTracker tracker; + + /** + * The refresh count (for diagnostics and testing). + */ + private int rebuildCount; + + /** + * The number of used entries (WeakReferences) in this table. + */ + private int entryCount; SegmentIdTable(SegmentTracker tracker) { this.tracker = tracker; @@ -75,13 +92,20 @@ && id.getLeastSignificantBits() == lsb) { return id; } + // shouldRefresh if we have a garbage collected entry shouldRefresh = shouldRefresh || id == null; + // open addressing / linear probing index = (index + 1) % references.size(); reference = references.get(index); } SegmentId id = new SegmentId(tracker, msb, lsb); references.set(index, new WeakReference(id)); + entryCount++; + if (entryCount > references.size() * 0.75) { + // more than 75% full + shouldRefresh = true; + } if (shouldRefresh) { refresh(); } @@ -91,7 +115,7 @@ /** * Returns all segment identifiers that are currently referenced in memory. * - * @return referenced segment identifiers + * @param ids referenced segment identifiers */ void collectReferencedIds(Collection ids) { ids.addAll(refresh()); @@ -113,16 +137,27 @@ hashCollisions = hashCollisions || (i != getIndex(id)); } else { references.set(i, null); + entryCount--; emptyReferences = true; } } } + + // even thought the entry count should already be like that, + // for security we set it again, because having a wrong entry count + // would be very problematic + entryCount = ids.size(); while (2 * ids.size() > size) { size *= 2; } + // we need to re-build the table if the new size is different, + // but also if we removed some of the entries (because an entry was + // garbage collected) and there is at least one entry at the "wrong" + // location (due to open addressing) if ((hashCollisions && emptyReferences) || size != references.size()) { + rebuildCount++; references.clear(); references.addAll(nCopies(size, (WeakReference) null)); @@ -157,7 +192,6 @@ if (id != null) { if (strategy.canRemove(id)) { reference.clear(); - references.set(i, null); dirty = true; } } @@ -167,4 +201,50 @@ refresh(); } } + + /** + * Get the number of map rebuild operations (used for testing and diagnostics). + * + * @return the rebuild count + */ + int getMapRebuildCount() { + return rebuildCount; + } + + /** + * Get the entry count (used for testing and diagnostics). + * + * @return the entry count + */ + int getEntryCount() { + return entryCount; + } + + /** + * Get the size of the internal map (used for testing and diagnostics). + * + * @return the map size + */ + int getMapSize() { + return references.size(); + } + + /** + * Get the raw list of segment ids (used for testing). + * + * @return the raw list + */ + List getRawSegmentIdList() { + ArrayList list = new ArrayList(); + for (WeakReference ref : references) { + if (ref != null) { + SegmentId id = ref.get(); + if (id != null) { + list.add(id); + } + } + } + return list; + } + } Index: src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java =================================================================== --- src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java (revision 0) +++ src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentIdTableTest.java (working copy) @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.segment; + +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import javax.annotation.Nonnull; + +import junit.framework.Assert; + +import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy; +import org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy.CleanupType; +import org.apache.jackrabbit.oak.plugins.segment.memory.MemoryStore; +import org.junit.Test; + +public class SegmentIdTableTest { + + /** + * OAK-2752 + */ + @Test + public void endlessSearchLoop() { + SegmentTracker tracker = new MemoryStore().getTracker(); + final SegmentIdTable tbl = new SegmentIdTable(tracker); + + List refs = new ArrayList(); + for (int i = 0; i < 1024; i++) { + refs.add(tbl.getSegmentId(i, i % 64)); + } + + Callable c = new Callable() { + + @Override + public SegmentId call() throws Exception { + // (2,1) doesn't exist + return tbl.getSegmentId(2, 1); + } + }; + Future f = Executors.newSingleThreadExecutor().submit(c); + SegmentId s = null; + try { + s = f.get(5, TimeUnit.SECONDS); + } catch (Exception e) { + Assert.fail(e.getMessage()); + } + Assert.assertNotNull(s); + Assert.assertEquals(2, s.getMostSignificantBits()); + Assert.assertEquals(1, s.getLeastSignificantBits()); + } + + @Test + public void randomized() { + SegmentTracker tracker = new MemoryStore().getTracker(); + final SegmentIdTable tbl = new SegmentIdTable(tracker); + + List refs = new ArrayList(); + Random r = new Random(1); + for (int i = 0; i < 16 * 1024; i++) { + refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong())); + } + Assert.assertEquals(16 * 1024, tbl.getEntryCount()); + Assert.assertEquals(16 * 2048, tbl.getMapSize()); + Assert.assertEquals(5, tbl.getMapRebuildCount()); + + r = new Random(1); + for (int i = 0; i < 16 * 1024; i++) { + refs.add(tbl.getSegmentId(r.nextLong(), r.nextLong())); + Assert.assertEquals(16 * 1024, tbl.getEntryCount()); + Assert.assertEquals(16 * 2048, tbl.getMapSize()); + Assert.assertEquals(5, tbl.getMapRebuildCount()); + } + } + + @Test + public void clearTable() { + SegmentTracker tracker = new MemoryStore().getTracker(); + final SegmentIdTable tbl = new SegmentIdTable(tracker); + + List refs = new ArrayList(); + int originalCount = 8; + for (int i = 0; i < originalCount; i++) { + refs.add(tbl.getSegmentId(i, i % 2)); + } + Assert.assertEquals(originalCount, tbl.getEntryCount()); + Assert.assertEquals(0, tbl.getMapRebuildCount()); + + tbl.clearSegmentIdTables(new CompactionStrategy(false, false, + CleanupType.CLEAN_NONE, originalCount, (byte) 0) { + + @Override + public boolean compacted(@Nonnull Callable setHead) + throws Exception { + return true; + } + + @Override + public boolean canRemove(SegmentId id) { + return id.getMostSignificantBits() < 4; + } + + }); + + Assert.assertEquals(4, tbl.getEntryCount()); + + for (SegmentId id : refs) { + if (id.getMostSignificantBits() >= 4) { + SegmentId id2 = tbl.getSegmentId( + id.getMostSignificantBits(), + id.getLeastSignificantBits()); + List list = tbl.getRawSegmentIdList(); + if (list.size() != new HashSet(list).size()) { + Collections.sort(list); + fail("duplicate entry " + list.toString()); + } + Assert.assertTrue(id == id2); + } + } + } + + @Test + public void justHashCollisions() { + SegmentTracker tracker = new MemoryStore().getTracker(); + final SegmentIdTable tbl = new SegmentIdTable(tracker); + + List refs = new ArrayList(); + int originalCount = 1024; + for (int i = 0; i < originalCount; i++) { + // modulo 128 to ensure we have conflicts + refs.add(tbl.getSegmentId(i, i % 128)); + } + Assert.assertEquals(originalCount, tbl.getEntryCount()); + Assert.assertEquals(1, tbl.getMapRebuildCount()); + + List refs2 = new ArrayList(); + tbl.collectReferencedIds(refs2); + Assert.assertEquals(refs.size(), refs2.size()); + + Assert.assertEquals(originalCount, tbl.getEntryCount()); + // we don't expect that there was a refresh, + // because there were just hash collisions + Assert.assertEquals(1, tbl.getMapRebuildCount()); + } + + @Test + public void gc() { + SegmentTracker tracker = new MemoryStore().getTracker(); + final SegmentIdTable tbl = new SegmentIdTable(tracker); + + List refs = new ArrayList(); + int originalCount = 1024; + for (int i = 0; i < originalCount; i++) { + // modulo 128 to ensure we have conflicts + refs.add(tbl.getSegmentId(i, i % 128)); + } + Assert.assertEquals(originalCount, tbl.getEntryCount()); + Assert.assertEquals(1, tbl.getMapRebuildCount()); + + for (int i = 0; i < refs.size() / 2; i++) { + // we need to remove the first entries, + // because if we remove the last entries, then + // getSegmentId would not detect that entries were freed up + refs.remove(0); + } + for (int gcCalls = 0;; gcCalls++) { + // needed here, so some entries can be garbage collected + System.gc(); + + for (SegmentId id : refs) { + SegmentId id2 = tbl.getSegmentId(id.getMostSignificantBits(), id.getLeastSignificantBits()); + Assert.assertTrue(id2 == id); + } + // because we found each entry, we expect the refresh count is the same + Assert.assertEquals(1, tbl.getMapRebuildCount()); + + // even thought this does not increase the entry count a lot, + // it is supposed to detect that entries were removed, + // and force a refresh, which would get rid of the unreferenced ids + for (int i = 0; i < 10; i++) { + tbl.getSegmentId(i, i); + } + + if (tbl.getEntryCount() < originalCount) { + break; + } else if (gcCalls > 10) { + fail("No entries were garbage collected after 10 times System.gc()"); + } + } + Assert.assertEquals(2, tbl.getMapRebuildCount()); + } +} \ No newline at end of file