Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java (revision 5f33ea40a7c2643c1612cde22a64060810e861ea) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java (revision ) @@ -36,7 +36,10 @@ import java.util.List; import java.util.concurrent.ConcurrentMap; +import javax.annotation.Nullable; + import com.google.common.base.Charsets; +import com.google.common.base.Function; import org.apache.jackrabbit.oak.api.PropertyState; import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector; @@ -121,18 +124,12 @@ private final SegmentId[] refids; /** - * String records read from segment. Used to avoid duplicate - * copies and repeated parsing of the same strings. - */ - private final ConcurrentMap strings = newConcurrentMap(); - - /** * Template records read from segment. Used to avoid duplicate * copies and repeated parsing of the same templates. */ private final ConcurrentMap templates = newConcurrentMap(); - private volatile long accessed = 0; + private volatile long accessed; /** * Decode a 4 byte aligned segment offset. @@ -367,44 +364,43 @@ } private String readString(int offset) { - String string = strings.get(offset); - if (string == null) { - string = loadString(offset); - strings.putIfAbsent(offset, string); // only keep the first copy + return tracker.getStringCache().getString(this, offset, loadString); - } + } - return string; - } - private String loadString(int offset) { + private final Function loadString = new Function() { + @Nullable + @Override + public String apply(Integer offset) { - int pos = pos(offset, 1); - long length = internalReadLength(pos); - if (length < SMALL_LIMIT) { - byte[] bytes = new byte[(int) length]; - ByteBuffer buffer = data.duplicate(); - buffer.position(pos + 1); - buffer.get(bytes); - return new String(bytes, Charsets.UTF_8); - } else if (length < MEDIUM_LIMIT) { - byte[] bytes = new byte[(int) length]; - ByteBuffer buffer = data.duplicate(); - buffer.position(pos + 2); - buffer.get(bytes); - return new String(bytes, Charsets.UTF_8); - } else if (length < Integer.MAX_VALUE) { - int size = (int) ((length + BLOCK_SIZE - 1) / BLOCK_SIZE); - ListRecord list = - new ListRecord(internalReadRecordId(pos + 8), size); - SegmentStream stream = new SegmentStream( - new RecordId(id, offset), list, length); - try { - return stream.getString(); - } finally { - stream.close(); - } - } else { - throw new IllegalStateException("String is too long: " + length); - } - } + int pos = pos(offset, 1); + long length = internalReadLength(pos); + if (length < SMALL_LIMIT) { + byte[] bytes = new byte[(int) length]; + ByteBuffer buffer = data.duplicate(); + buffer.position(pos + 1); + buffer.get(bytes); + return new String(bytes, Charsets.UTF_8); + } else if (length < MEDIUM_LIMIT) { + byte[] bytes = new byte[(int) length]; + ByteBuffer buffer = data.duplicate(); + buffer.position(pos + 2); + buffer.get(bytes); + return new String(bytes, Charsets.UTF_8); + } else if (length < Integer.MAX_VALUE) { + int size = (int) ((length + BLOCK_SIZE - 1) / BLOCK_SIZE); + ListRecord list = + new ListRecord(internalReadRecordId(pos + 8), size); + SegmentStream stream = new SegmentStream( + new RecordId(id, offset), list, length); + try { + return stream.getString(); + } finally { + stream.close(); + } + } else { + throw new IllegalStateException("String is too long: " + length); + } + } + }; MapRecord readMap(RecordId id) { return new MapRecord(id); Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java (revision 5f33ea40a7c2643c1612cde22a64060810e861ea) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java (revision ) @@ -91,7 +91,9 @@ private final LinkedList segments = newLinkedList(); private long currentSize; - + + private final StringCache stringCache; + public SegmentTracker(SegmentStore store, int cacheSizeMB, SegmentVersion version) { for (int i = 0; i < tables.length; i++) { @@ -103,6 +105,7 @@ this.cacheSize = cacheSizeMB * MB; this.compactionMap = new AtomicReference( CompactionMap.EMPTY); + stringCache = new StringCache((int) Math.min(Integer.MAX_VALUE, cacheSize)); } public SegmentTracker(SegmentStore store, SegmentVersion version) { @@ -126,7 +129,12 @@ */ public synchronized void clearCache() { segments.clear(); + stringCache.clear(); currentSize = 0; + } + + public StringCache getStringCache() { + return stringCache; } Segment getSegment(SegmentId id) { Index: oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java (revision ) +++ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/StringCache.java (revision ) @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.plugins.segment; + +import com.google.common.base.Function; +import org.apache.jackrabbit.oak.cache.CacheLIRS; + +/** + * A string cache. It has two components: a fast cache for small strings, based + * on an array, and a slow cache that uses a LIRS cache. + */ +public class StringCache { + + /** + * The slow cache. + */ + private CacheLIRS cache; + + /** + * Create a new string cache. + * + * @param maxSize the maximum memory in bytes. + */ + StringCache(int maxSize) { + cache = new CacheLIRS.Builder().maximumSize(maxSize).averageWeight(100) + .build(); + } + + /** + * Get the string, loading it if necessary. + * + * @param segment the segment + * @param offset the offset + * @return the string (never null) + */ + String getString(Segment segment, int offset, Function loader) { + int hash = getEntryHash(segment, offset); + String s = FastCache.getString(hash, segment, offset); + if (s != null) { + return s; + } + StringCacheEntry key = new StringCacheEntry(hash, segment, offset, null); + s = cache.getIfPresent(key); + if (s == null) { + s = loader.apply(offset); + cache.put(key, s, getMemory(s)); + } + if (FastCache.isSmall(s)) { + key.setString(s); + FastCache.addString(hash, key); + } + return s; + } + + public void clear() { + cache.cleanUp(); + } + + private static int getMemory(String s) { + return 100 + s.length() * 2; + } + + private static int getEntryHash(Segment segment, int offset) { + int hash = segment.getSegmentId().hashCode() + offset; + hash = ((hash >>> 16) ^ hash) * 0x45d9f3b; + return hash = (hash >>> 16) ^ hash; + } + + /** + * A fast cache based on an array. + */ + static class FastCache { + + /** + * The maximum number of characters in string that are cached. + */ + static final int MAX_STRING_SIZE = 128; + + /** + * The number of entries in the cache. Must be a power of 2. + */ + private static final int CACHE_SIZE = 16 * 1024; + + /** + * The cache array. + */ + private static final StringCacheEntry[] CACHE = new StringCacheEntry[CACHE_SIZE]; + + /** + * Get the string if it is stored. + * + * @param hash the hash + * @param segment the segment + * @param offset the offset + * @return the string, or null + */ + static String getString(int hash, Segment segment, int offset) { + int index = hash & (CACHE_SIZE - 1); + StringCacheEntry e = CACHE[index]; + if (e != null && e.matches(segment, offset)) { + return e.string; + } + return null; + } + + /** + * Whether the entry is small, in which case it can be kept in the fast cache. + * + * @param s the string + * @return whether the entry is small + */ + static boolean isSmall(String s) { + return s.length() <= MAX_STRING_SIZE; + } + + static void addString(int hash, StringCacheEntry entry) { + int index = hash & (CACHE_SIZE - 1); + CACHE[index] = entry; + } + + } + + static class StringCacheEntry { + private final int hash; + private final long msb, lsb; + private final int offset; + private String string; + + StringCacheEntry(int hash, Segment segment, int offset, String string) { + this.hash = hash; + SegmentId id = segment.getSegmentId(); + this.msb = id.getMostSignificantBits(); + this.lsb = id.getLeastSignificantBits(); + this.offset = offset; + this.string = string; + } + + void setString(String string) { + if (string == null) { + throw new NullPointerException(); + } + this.string = string; + } + + boolean matches(Segment segment, int offset) { + if (this.offset != offset) { + return false; + } + SegmentId id = segment.getSegmentId(); + return id.getMostSignificantBits() == msb && + id.getLeastSignificantBits() == lsb; + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (!(other instanceof StringCacheEntry)) { + return false; + } + StringCacheEntry o = (StringCacheEntry) other; + return o.hash == hash && o.msb == msb && o.lsb == lsb && + o.offset == offset; + } + + } + +}