Index: src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java =================================================================== --- src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java (revision 0) +++ src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java (working copy) @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.plugins.segment; + +import java.util.Arrays; + +import com.google.common.base.Function; +import org.apache.jackrabbit.oak.cache.CacheLIRS; + +/** + * A string cache. It has two components: a fast cache for small strings, based + * on an array, and a slow cache that uses a LIRS cache. + */ +public class StringCache { + + /** + * The fast (array based) cache. + */ + private FastCache fastCache = new FastCache(); + + /** + * The slower (LIRS) cache. + */ + private CacheLIRS cache; + + /** + * Create a new string cache. + * + * @param maxSize the maximum memory in bytes. + */ + StringCache(int maxSize) { + cache = new CacheLIRS.Builder().maximumSize(maxSize).averageWeight(100) + .build(); + } + + /** + * Get the string, loading it if necessary. + * + * @param msb the msb of the segment + * @param lsb the lsb of the segment + * @param offset the offset + * @param loader the string loader function + * @return the string (never null) + */ + public String getString(long msb, long lsb, int offset, Function loader) { + int hash = getEntryHash(msb, lsb, offset); + String s = fastCache.getString(hash, msb, lsb, offset); + if (s != null) { + return s; + } + StringCacheEntry key = new StringCacheEntry(hash, msb, lsb, offset, null); + s = cache.getIfPresent(key); + if (s == null) { + s = loader.apply(offset); + cache.put(key, s, getMemory(s)); + } + if (FastCache.isSmall(s)) { + key.setString(s); + fastCache.addString(hash, key); + } + return s; + } + + /** + * Clear the cache. + */ + public void clear() { + cache.invalidateAll(); + fastCache.clear(); + } + + private static int getMemory(String s) { + return 100 + s.length() * 2; + } + + private static int getEntryHash(long lsb, long msb, int offset) { + int hash = (int) (msb ^ lsb) + offset; + hash = ((hash >>> 16) ^ hash) * 0x45d9f3b; + return hash = (hash >>> 16) ^ hash; + } + + /** + * A fast cache based on an array. + */ + static class FastCache { + + /** + * The maximum number of characters in string that are cached. + */ + static final int MAX_STRING_SIZE = 128; + + /** + * The number of entries in the cache. Must be a power of 2. + */ + private static final int CACHE_SIZE = 16 * 1024; + + /** + * The cache array. + */ + private final StringCacheEntry[] cache = new StringCacheEntry[CACHE_SIZE]; + + /** + * Get the string if it is stored. + * + * @param hash the hash + * @param segment the segment + * @param offset the offset + * @return the string, or null + */ + String getString(int hash, long msb, long lsb, int offset) { + int index = hash & (CACHE_SIZE - 1); + StringCacheEntry e = cache[index]; + if (e != null && e.matches(msb, lsb, offset)) { + return e.string; + } + return null; + } + + void clear() { + Arrays.fill(cache, null); + } + + /** + * Whether the entry is small, in which case it can be kept in the fast cache. + * + * @param s the string + * @return whether the entry is small + */ + static boolean isSmall(String s) { + return s.length() <= MAX_STRING_SIZE; + } + + void addString(int hash, StringCacheEntry entry) { + int index = hash & (CACHE_SIZE - 1); + cache[index] = entry; + } + + } + + static class StringCacheEntry { + private final int hash; + private final long msb, lsb; + private final int offset; + private String string; + + StringCacheEntry(int hash, long msb, long lsb, int offset, String string) { + this.hash = hash; + this.msb = msb; + this.lsb = lsb; + this.offset = offset; + this.string = string; + } + + void setString(String string) { + if (string == null) { + throw new NullPointerException(); + } + this.string = string; + } + + boolean matches(long msb, long lsb, int offset) { + return this.offset == offset && this.msb == msb && this.lsb == lsb; + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (!(other instanceof StringCacheEntry)) { + return false; + } + StringCacheEntry o = (StringCacheEntry) other; + return o.hash == hash && o.msb == msb && o.lsb == lsb && + o.offset == offset; + } + + } + +} \ No newline at end of file Index: src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java =================================================================== --- src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java (revision 1690034) +++ src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java (working copy) @@ -91,6 +91,8 @@ private final LinkedList segments = newLinkedList(); private long currentSize; + + private final StringCache stringCache; public SegmentTracker(SegmentStore store, int cacheSizeMB, SegmentVersion version) { @@ -103,6 +105,14 @@ this.cacheSize = cacheSizeMB * MB; this.compactionMap = new AtomicReference( CompactionMap.EMPTY); + StringCache c; + if (Boolean.getBoolean("oak.segment.disableStringCache")) { + c = null; + } else { + int stringCacheSize = (int) Math.min(Integer.MAX_VALUE, cacheSize); + c = new StringCache(stringCacheSize); + } + stringCache = c; } public SegmentTracker(SegmentStore store, SegmentVersion version) { @@ -126,8 +136,20 @@ */ public synchronized void clearCache() { segments.clear(); + if (stringCache != null) { + stringCache.clear(); + } currentSize = 0; } + + /** + * Get the string cache, if there is one. + * + * @return the string cache + */ + public StringCache getStringCache() { + return stringCache; + } Segment getSegment(SegmentId id) { try { Index: src/test/java/org/apache/jackrabbit/oak/plugins/segment/StringCacheTest.java =================================================================== --- src/test/java/org/apache/jackrabbit/oak/plugins/segment/StringCacheTest.java (revision 0) +++ src/test/java/org/apache/jackrabbit/oak/plugins/segment/StringCacheTest.java (working copy) @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.plugins.segment; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; + +import javax.annotation.Nullable; + +import org.junit.Test; + +import com.google.common.base.Function; + +public class StringCacheTest { + + @Test + public void empty() { + final AtomicInteger counter = new AtomicInteger(); + Function loader = new Function() { + @Override @Nullable + public String apply(@Nullable Integer input) { + counter.incrementAndGet(); + return "" + input; + } + }; + StringCache c = new StringCache(0); + for (int repeat = 0; repeat < 10; repeat++) { + for (int i = 0; i < 1000; i++) { + assertEquals("" + i, c.getString(i, i, i, loader)); + } + } + // the LIRS cache should be almost empty (low hit rate there) + assertTrue("" + counter, counter.get() > 1000); + // but the fast cache should improve the total hit rate + assertTrue("" + counter, counter.get() < 5000); + } + + @Test + public void largeEntries() { + final AtomicInteger counter = new AtomicInteger(); + final String large = new String(new char[1024]); + Function loader = new Function() { + @Override @Nullable + public String apply(@Nullable Integer input) { + counter.incrementAndGet(); + return large + input; + } + }; + StringCache c = new StringCache(1024); + for (int repeat = 0; repeat < 10; repeat++) { + for (int i = 0; i < 1000; i++) { + assertEquals(large + i, c.getString(i, i, i, loader)); + assertEquals(large + 0, c.getString(0, 0, 0, loader)); + } + } + // the LIRS cache should be almost empty (low hit rate there) + // and large strings are not kept in the fast cache, so hit rate should be bad + assertTrue("" + counter, counter.get() > 9000); + assertTrue("" + counter, counter.get() < 10000); + } + + @Test + public void clear() { + final AtomicInteger counter = new AtomicInteger(); + Function uniqueLoader = new Function() { + @Override @Nullable + public String apply(@Nullable Integer input) { + return "" + counter.incrementAndGet(); + } + }; + StringCache c = new StringCache(0); + // load a new entry + assertEquals("1", c.getString(0, 0, 0, uniqueLoader)); + // but only once + assertEquals("1", c.getString(0, 0, 0, uniqueLoader)); + c.clear(); + // after clearing the cache, load a new entry + assertEquals("2", c.getString(0, 0, 0, uniqueLoader)); + assertEquals("2", c.getString(0, 0, 0, uniqueLoader)); + } + + @Test + public void randomized() { + ArrayList> loaderList = new ArrayList>(); + int segmentCount = 10; + for (int i = 0; i < segmentCount; i++) { + final int x = i; + Function loader = new Function() { + @Override @Nullable + public String apply(@Nullable Integer input) { + return "loader #" + x + " offset " + input; + } + }; + loaderList.add(loader); + } + StringCache c = new StringCache(10); + Random r = new Random(1); + for (int i = 0; i < 1000; i++) { + int segment = r.nextInt(segmentCount); + int offset = r.nextInt(10); + Function loader = loaderList.get(segment); + String x = c.getString(segment, segment, offset, loader); + assertEquals(loader.apply(offset), x); + } + } + +} Index: src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java =================================================================== --- src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java (revision 1690034) +++ src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java (working copy) @@ -36,7 +36,10 @@ import java.util.List; import java.util.concurrent.ConcurrentMap; +import javax.annotation.Nullable; + import com.google.common.base.Charsets; +import com.google.common.base.Function; import org.apache.jackrabbit.oak.api.PropertyState; import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector; @@ -132,8 +135,18 @@ */ private final ConcurrentMap templates = newConcurrentMap(); - private volatile long accessed = 0; + private volatile long accessed; + + private final Function loadString = new Function() { + @Nullable + @Override + public String apply(Integer offset) { + return loadString(offset); + } + }; + private final StringCache stringCache; + /** * Decode a 4 byte aligned segment offset. * @param offset 4 byte aligned segment offset @@ -159,6 +172,7 @@ public Segment(SegmentTracker tracker, SegmentId id, ByteBuffer data, SegmentVersion version) { this.tracker = checkNotNull(tracker); this.id = checkNotNull(id); + this.stringCache = tracker.getStringCache(); this.data = checkNotNull(data); if (id.isDataSegmentId()) { byte segmentVersion = data.get(3); @@ -178,6 +192,7 @@ Segment(SegmentTracker tracker, byte[] buffer) { this.tracker = checkNotNull(tracker); this.id = tracker.newDataSegmentId(); + this.stringCache = tracker.getStringCache(); this.data = ByteBuffer.wrap(checkNotNull(buffer)); this.refids = new SegmentId[SEGMENT_REFERENCE_LIMIT + 1]; this.refids[0] = id; @@ -367,6 +382,11 @@ } private String readString(int offset) { + if (stringCache != null) { + long msb = id.getMostSignificantBits(); + long lsb = id.getLeastSignificantBits(); + return stringCache.getString(msb, lsb, offset, loadString); + } String string = strings.get(offset); if (string == null) { string = loadString(offset);