Index: lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 1175586) +++ lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (revision ) @@ -26,10 +26,7 @@ import java.util.Map; import java.util.WeakHashMap; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.*; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.StringHelper; @@ -43,706 +40,114 @@ * * @since lucene 1.4 */ +@Deprecated class FieldCacheImpl implements FieldCache { - private Map,Cache> caches; FieldCacheImpl() { - init(); } - private synchronized void init() { - caches = new HashMap,Cache>(9); - caches.put(Byte.TYPE, new ByteCache(this)); - caches.put(Short.TYPE, new ShortCache(this)); - caches.put(Integer.TYPE, new IntCache(this)); - caches.put(Float.TYPE, new FloatCache(this)); - caches.put(Long.TYPE, new LongCache(this)); - caches.put(Double.TYPE, new DoubleCache(this)); - caches.put(String.class, new StringCache(this)); - caches.put(StringIndex.class, new StringIndexCache(this)); - caches.put(DocsWithFieldCache.class, new DocsWithFieldCache(this)); - } public synchronized void purgeAllCaches() { - init(); + SlowMultiReaderWrapper.getNonAtomicFieldCache().purgeAllCaches(); } public synchronized void purge(IndexReader r) { - for(Cache c : caches.values()) { - c.purge(r); + new SlowMultiReaderWrapper(r).getFieldCache().purgeCache(); - } + } - } - + public synchronized CacheEntry[] getCacheEntries() { - List result = new ArrayList(17); - for(final Map.Entry,Cache> cacheEntry: caches.entrySet()) { - final Cache cache = cacheEntry.getValue(); - final Class cacheType = cacheEntry.getKey(); - synchronized(cache.readerCache) { - for (final Map.Entry> readerCacheEntry : cache.readerCache.entrySet()) { - final Object readerKey = readerCacheEntry.getKey(); - if (readerKey == null) continue; - final Map innerCache = readerCacheEntry.getValue(); - for (final Map.Entry mapEntry : innerCache.entrySet()) { - Entry entry = mapEntry.getKey(); - result.add(new CacheEntryImpl(readerKey, entry.field, - cacheType, entry.custom, - mapEntry.getValue())); + return SlowMultiReaderWrapper.getCacheEntries(); - } + } - } - } - } - return result.toArray(new CacheEntry[result.size()]); - } - + - private static final class CacheEntryImpl extends CacheEntry { - private final Object readerKey; - private final String fieldName; - private final Class cacheType; - private final Object custom; - private final Object value; - CacheEntryImpl(Object readerKey, String fieldName, - Class cacheType, - Object custom, - Object value) { - this.readerKey = readerKey; - this.fieldName = fieldName; - this.cacheType = cacheType; - this.custom = custom; - this.value = value; - - // :HACK: for testing. -// if (null != locale || SortField.CUSTOM != sortFieldType) { -// throw new RuntimeException("Locale/sortFieldType: " + this); -// } - - } - @Override - public Object getReaderKey() { return readerKey; } - @Override - public String getFieldName() { return fieldName; } - @Override - public Class getCacheType() { return cacheType; } - @Override - public Object getCustom() { return custom; } - @Override - public Object getValue() { return value; } - } - - /** - * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops - * processing terms and returns the current FieldCache - * array. - */ - static final class StopFillCacheException extends RuntimeException { - } - - final static IndexReader.ReaderFinishedListener purgeReader = new IndexReader.ReaderFinishedListener() { - // @Override -- not until Java 1.6 - public void finished(IndexReader reader) { - FieldCache.DEFAULT.purge(reader); - } - }; - - /** Expert: Internal cache. */ - abstract static class Cache { - Cache() { - this.wrapper = null; - } - - Cache(FieldCache wrapper) { - this.wrapper = wrapper; - } - - final FieldCache wrapper; - - final Map> readerCache = new WeakHashMap>(); - - protected abstract Object createValue(IndexReader reader, Entry key) - throws IOException; - - /** Remove this reader from the cache, if present. */ - public void purge(IndexReader r) { - Object readerKey = r.getCoreCacheKey(); - synchronized(readerCache) { - readerCache.remove(readerKey); - } - } - - public Object get(IndexReader reader, Entry key) throws IOException { - Map innerCache; - Object value; - final Object readerKey = reader.getCoreCacheKey(); - synchronized (readerCache) { - innerCache = readerCache.get(readerKey); - if (innerCache == null) { - // First time this reader is using FieldCache - innerCache = new HashMap(); - readerCache.put(readerKey, innerCache); - reader.addReaderFinishedListener(purgeReader); - value = null; - } else { - value = innerCache.get(key); - } - if (value == null) { - value = new CreationPlaceholder(); - innerCache.put(key, value); - } - } - if (value instanceof CreationPlaceholder) { - synchronized (value) { - CreationPlaceholder progress = (CreationPlaceholder) value; - if (progress.value == null) { - progress.value = createValue(reader, key); - synchronized (readerCache) { - innerCache.put(key, progress.value); - } - - // Only check if key.custom (the parser) is - // non-null; else, we check twice for a single - // call to FieldCache.getXXX - if (key.custom != null && wrapper != null) { - final PrintStream infoStream = wrapper.getInfoStream(); - if (infoStream != null) { - printNewInsanity(infoStream, progress.value); - } - } - } - return progress.value; - } - } - return value; - } - - private void printNewInsanity(PrintStream infoStream, Object value) { - final FieldCacheSanityChecker.Insanity[] insanities = FieldCacheSanityChecker.checkSanity(wrapper); - for(int i=0;i= reader.numDocs()) { - // The cardinality of the BitSet is numDocs if all documents have a value. - // As deleted docs are not in TermDocs, this is always true - assert numSet == reader.numDocs(); - return new Bits.MatchAllBits(reader.maxDoc()); - } - return res; - } - } - // inherit javadocs public float[] getFloats (IndexReader reader, String field) throws IOException { - return getFloats(reader, field, null); + return new SlowMultiReaderWrapper(reader).getFieldCache().getFloats(field); } // inherit javadocs public float[] getFloats(IndexReader reader, String field, FloatParser parser) throws IOException { - - return (float[]) caches.get(Float.TYPE).get(reader, new Entry(field, parser)); + return new SlowMultiReaderWrapper(reader).getFieldCache().getFloats(field, parser); } - static final class FloatCache extends Cache { - FloatCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - Entry entry = entryKey; - String field = entry.field; - FloatParser parser = (FloatParser) entry.custom; - if (parser == null) { - try { - return wrapper.getFloats(reader, field, DEFAULT_FLOAT_PARSER); - } catch (NumberFormatException ne) { - return wrapper.getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER); - } - } - float[] retArray = null; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - float termval = parser.parseFloat(term.text()); - if (retArray == null) // late init - retArray = new float[reader.maxDoc()]; - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; - } - } while (termEnum.next()); - } catch (StopFillCacheException stop) { - } finally { - termDocs.close(); - termEnum.close(); - } - if (retArray == null) // no values - retArray = new float[reader.maxDoc()]; - return retArray; - } - } - - public long[] getLongs(IndexReader reader, String field) throws IOException { - return getLongs(reader, field, null); + return new SlowMultiReaderWrapper(reader).getFieldCache().getLongs(field); } - + // inherit javadocs public long[] getLongs(IndexReader reader, String field, FieldCache.LongParser parser) throws IOException { - return (long[]) caches.get(Long.TYPE).get(reader, new Entry(field, parser)); + return new SlowMultiReaderWrapper(reader).getFieldCache().getLongs(field, parser); } - static final class LongCache extends Cache { - LongCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entry) - throws IOException { - String field = entry.field; - FieldCache.LongParser parser = (FieldCache.LongParser) entry.custom; - if (parser == null) { - try { - return wrapper.getLongs(reader, field, DEFAULT_LONG_PARSER); - } catch (NumberFormatException ne) { - return wrapper.getLongs(reader, field, NUMERIC_UTILS_LONG_PARSER); - } - } - long[] retArray = null; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term(field)); - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - long termval = parser.parseLong(term.text()); - if (retArray == null) // late init - retArray = new long[reader.maxDoc()]; - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; - } - } while (termEnum.next()); - } catch (StopFillCacheException stop) { - } finally { - termDocs.close(); - termEnum.close(); - } - if (retArray == null) // no values - retArray = new long[reader.maxDoc()]; - return retArray; - } - } - // inherit javadocs public double[] getDoubles(IndexReader reader, String field) throws IOException { - return getDoubles(reader, field, null); + return new SlowMultiReaderWrapper(reader).getFieldCache().getDoubles(field); } // inherit javadocs public double[] getDoubles(IndexReader reader, String field, FieldCache.DoubleParser parser) throws IOException { - return (double[]) caches.get(Double.TYPE).get(reader, new Entry(field, parser)); + return new SlowMultiReaderWrapper(reader).getFieldCache().getDoubles(field, parser); } - static final class DoubleCache extends Cache { - DoubleCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - Entry entry = entryKey; - String field = entry.field; - FieldCache.DoubleParser parser = (FieldCache.DoubleParser) entry.custom; - if (parser == null) { - try { - return wrapper.getDoubles(reader, field, DEFAULT_DOUBLE_PARSER); - } catch (NumberFormatException ne) { - return wrapper.getDoubles(reader, field, NUMERIC_UTILS_DOUBLE_PARSER); - } - } - double[] retArray = null; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - double termval = parser.parseDouble(term.text()); - if (retArray == null) // late init - retArray = new double[reader.maxDoc()]; - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; - } - } while (termEnum.next()); - } catch (StopFillCacheException stop) { - } finally { - termDocs.close(); - termEnum.close(); - } - if (retArray == null) // no values - retArray = new double[reader.maxDoc()]; - return retArray; - } - } - // inherit javadocs public String[] getStrings(IndexReader reader, String field) throws IOException { - return (String[]) caches.get(String.class).get(reader, new Entry(field, (Parser)null)); + return new SlowMultiReaderWrapper(reader).getFieldCache().getStrings(field); } - static final class StringCache extends Cache { - StringCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - String field = StringHelper.intern(entryKey.field); - final String[] retArray = new String[reader.maxDoc()]; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - final int termCountHardLimit = reader.maxDoc(); - int termCount = 0; - try { - do { - if (termCount++ == termCountHardLimit) { - // app is misusing the API (there is more than - // one term per doc); in this case we make best - // effort to load what we can (see LUCENE-2142) - break; - } - - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - String termval = term.text(); - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; - } - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); - } - return retArray; - } - } - // inherit javadocs public StringIndex getStringIndex(IndexReader reader, String field) throws IOException { - return (StringIndex) caches.get(StringIndex.class).get(reader, new Entry(field, (Parser)null)); + return new SlowMultiReaderWrapper(reader).getFieldCache().getStringIndex(field); } - static final class StringIndexCache extends Cache { - StringIndexCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - String field = StringHelper.intern(entryKey.field); - final int[] retArray = new int[reader.maxDoc()]; - String[] mterms = new String[reader.maxDoc()+1]; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - int t = 0; // current term number - - // an entry for documents that have no terms in this field - // should a document with no terms be at top or bottom? - // this puts them at the top - if it is changed, FieldDocSortedHitQueue - // needs to change as well. - mterms[t++] = null; - - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field || t >= mterms.length) break; - - // store term text - mterms[t] = term.text(); - - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = t; - } - - t++; - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); - } - - if (t == 0) { - // if there are no terms, make the term array - // have a single null entry - mterms = new String[1]; - } else if (t < mterms.length) { - // if there are less terms than documents, - // trim off the dead array space - String[] terms = new String[t]; - System.arraycopy (mterms, 0, terms, 0, t); - mterms = terms; - } - - StringIndex value = new StringIndex (retArray, mterms); - return value; - } - } - - private volatile PrintStream infoStream; - public void setInfoStream(PrintStream stream) { - infoStream = stream; + SlowMultiReaderWrapper.setInfoStream(stream); } public PrintStream getInfoStream() { - return infoStream; + return SlowMultiReaderWrapper.getInfoStream(); } } Index: lucene/src/java/org/apache/lucene/search/FieldCache.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCache.java (revision 1175586) +++ lucene/src/java/org/apache/lucene/search/FieldCache.java (revision ) @@ -18,6 +18,7 @@ */ import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.cache.StopFillCacheException; import org.apache.lucene.util.Bits; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.RamUsageEstimator; @@ -41,7 +42,7 @@ public interface FieldCache { public static final class CreationPlaceholder { - Object value; + public Object value; } /** Indicator for StringIndex values in the cache. */ @@ -145,6 +146,7 @@ } /** Expert: The cache used internally by sorting and range query classes. */ + @Deprecated public static FieldCache DEFAULT = new FieldCacheImpl(); /** The default parser for byte values, which are encoded by {@link Byte#toString(byte)} */ @@ -239,7 +241,7 @@ public int parseInt(String val) { final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT; if (shift>0 && shift<=31) - throw new FieldCacheImpl.StopFillCacheException(); + throw new StopFillCacheException(); return NumericUtils.prefixCodedToInt(val); } protected Object readResolve() { @@ -259,7 +261,7 @@ public float parseFloat(String val) { final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT; if (shift>0 && shift<=31) - throw new FieldCacheImpl.StopFillCacheException(); + throw new StopFillCacheException(); return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(val)); } protected Object readResolve() { @@ -279,7 +281,7 @@ public long parseLong(String val) { final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG; if (shift>0 && shift<=63) - throw new FieldCacheImpl.StopFillCacheException(); + throw new StopFillCacheException(); return NumericUtils.prefixCodedToLong(val); } protected Object readResolve() { @@ -299,7 +301,7 @@ public double parseDouble(String val) { final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG; if (shift>0 && shift<=63) - throw new FieldCacheImpl.StopFillCacheException(); + throw new StopFillCacheException(); return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(val)); } protected Object readResolve() { @@ -316,7 +318,7 @@ * reader.maxDoc(), with turned on bits for each docid that * does have a value for this field. */ - public Bits getDocsWithField(IndexReader reader, String field) + public Bits getDocsWithField(IndexReader reader, String field) throws IOException; /** Checks the internal cache for an appropriate entry, and if none is Index: lucene/src/java/org/apache/lucene/search/cache/StopFillCacheException.java =================================================================== --- lucene/src/java/org/apache/lucene/search/cache/StopFillCacheException.java (revision ) +++ lucene/src/java/org/apache/lucene/search/cache/StopFillCacheException.java (revision ) @@ -0,0 +1,26 @@ +package org.apache.lucene.search.cache; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops + * processing terms and returns the current FieldCache + * array. + */ +public class StopFillCacheException extends RuntimeException { +} Index: lucene/src/test/org/apache/lucene/index/TestSegmentFieldCacheImpl.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSegmentFieldCacheImpl.java (revision ) +++ lucene/src/test/org/apache/lucene/index/TestSegmentFieldCacheImpl.java (revision ) @@ -0,0 +1,290 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.cache.AtomicFieldCache; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util._TestUtil; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.*; + +/** + * + */ +public class TestSegmentFieldCacheImpl extends LuceneTestCase { + + protected IndexReader reader; + private String[] unicodeStrings; + private Directory directory; + + @Override + public void setUp() throws Exception { + super.setUp(); + int numDocs = atLeast(1000); + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + long theLong = Long.MAX_VALUE; + double theDouble = Double.MAX_VALUE; + byte theByte = Byte.MAX_VALUE; + short theShort = Short.MAX_VALUE; + int theInt = Integer.MAX_VALUE; + float theFloat = Float.MAX_VALUE; + unicodeStrings = new String[numDocs]; + if (VERBOSE) { + System.out.println("TEST: setUp"); + } + writer.w.setInfoStream(VERBOSE ? System.out : null); + for (int i = 0; i < numDocs; i++){ + Document doc = new Document(); + doc.add(newField("theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theDouble", String.valueOf(theDouble--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theByte", String.valueOf(theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theShort", String.valueOf(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theInt", String.valueOf(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theFloat", String.valueOf(theFloat--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + + // sometimes skip the field: + if (random.nextInt(40) != 17) { + unicodeStrings[i] = generateString(i); + doc.add(newField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + } + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testInfoStream() throws Exception { + List subReaders = new LinkedList(); + ReaderUtil.gatherSubReaders(subReaders, reader); + AtomicFieldCache cache = subReaders.get(0).getFieldCache(); + try { + ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); + cache.setInfoStream(new PrintStream(bos)); + cache.getDoubles("theDouble"); + cache.getFloats("theDouble"); + assertTrue(bos.toString().indexOf("WARNING") != -1); + } finally { + cache.purgeCache(); + } + } + + public void test() throws IOException { + List subReaders = new ArrayList(); + ReaderUtil.gatherSubReaders(subReaders, reader); + int start = 0; + for (IndexReader subReader : subReaders) { + AtomicFieldCache atomicCache = subReader.getFieldCache(); + double [] doubles = atomicCache.getDoubles("theDouble"); + assertSame("Second request to cache return same array", doubles, atomicCache.getDoubles("theDouble")); + assertSame("Second request with explicit parser return same array", doubles, atomicCache.getDoubles("theDouble", FieldCache.DEFAULT_DOUBLE_PARSER)); + int expectedLength = subReader.maxDoc(); + assertTrue("doubles Size: " + doubles.length + " is not: " + expectedLength, doubles.length == expectedLength); + for (int i = 0; i < doubles.length; i++) { + int j = start + i; + assertTrue(doubles[i] + " does not equal: " + (Double.MAX_VALUE - j), doubles[i] == (Double.MAX_VALUE - j)); + } + start += expectedLength; + } + + start = 0; + for (IndexReader subReader : subReaders) { + AtomicFieldCache atomicCache = subReader.getFieldCache(); + long [] longs = atomicCache.getLongs("theLong"); + assertSame("Second request to cache return same array", longs, atomicCache.getLongs("theLong")); + assertSame("Second request with explicit parser return same array", longs, atomicCache.getLongs("theLong", FieldCache.DEFAULT_LONG_PARSER)); + int expectedLength = subReader.maxDoc(); + assertTrue("longs Size: " + longs.length + " is not: " + expectedLength, longs.length == expectedLength); + for (int i = 0; i < longs.length; i++) { + int j = start + i; + assertTrue(longs[i] + " does not equal: " + (Long.MAX_VALUE - j) + " i=" + i, longs[i] == (Long.MAX_VALUE - j)); + } + start += expectedLength; + } + + start = 0; + for (IndexReader subReader : subReaders) { + AtomicFieldCache atomicCache = subReader.getFieldCache(); + byte [] bytes = atomicCache.getBytes("theByte"); + assertSame("Second request to cache return same array", bytes, atomicCache.getBytes("theByte")); + assertSame("Second request with explicit parser return same array", bytes, atomicCache.getBytes("theByte", FieldCache.DEFAULT_BYTE_PARSER)); + int expectedLength = subReader.maxDoc(); + assertTrue("bytes Size: " + bytes.length + " is not: " + expectedLength, bytes.length == expectedLength); + for (int i = 0; i < bytes.length; i++) { + int j = start + i; + assertTrue(bytes[i] + " does not equal: " + (Byte.MAX_VALUE - j), bytes[i] == (byte) (Byte.MAX_VALUE - j)); + } + start += expectedLength; + } + + start = 0; + for (IndexReader subReader : subReaders) { + AtomicFieldCache atomicCache = subReader.getFieldCache(); + short [] shorts = atomicCache.getShorts("theShort"); + assertSame("Second request to cache return same array", shorts, atomicCache.getShorts("theShort")); + assertSame("Second request with explicit parser return same array", shorts, atomicCache.getShorts("theShort", FieldCache.DEFAULT_SHORT_PARSER)); + int expectedLength = subReader.maxDoc(); + assertTrue("shorts Size: " + shorts.length + " is not: " + expectedLength, shorts.length == expectedLength); + for (int i = 0; i < shorts.length; i++) { + int j = start + i; + assertTrue(shorts[i] + " does not equal: " + (Short.MAX_VALUE - j), shorts[i] == (short) (Short.MAX_VALUE - j)); + } + start += expectedLength; + } + + start = 0; + for (IndexReader subReader : subReaders) { + AtomicFieldCache atomicCache = subReader.getFieldCache(); + int [] ints = atomicCache.getInts("theInt"); + assertSame("Second request to cache return same array", ints, atomicCache.getInts("theInt")); + assertSame("Second request with explicit parser return same array", ints, atomicCache.getInts("theInt", FieldCache.DEFAULT_INT_PARSER)); + int expectedLength = subReader.maxDoc(); + assertTrue("ints Size: " + ints.length + " is not: " + expectedLength, ints.length == expectedLength); + for (int i = 0; i < ints.length; i++) { + int j = start + i; + assertTrue(ints[i] + " does not equal: " + (Integer.MAX_VALUE - j), ints[i] == (Integer.MAX_VALUE - j)); + } + start += expectedLength; + } + + start = 0; + for (IndexReader subReader : subReaders) { + AtomicFieldCache atomicCache = subReader.getFieldCache(); + float [] floats = atomicCache.getFloats("theFloat"); + assertSame("Second request to cache return same array", floats, atomicCache.getFloats("theFloat")); + assertSame("Second request with explicit parser return same array", floats, atomicCache.getFloats("theFloat", FieldCache.DEFAULT_FLOAT_PARSER)); + int expectedLength = subReader.maxDoc(); + assertTrue("floats Size: " + floats.length + " is not: " + expectedLength, floats.length == expectedLength); + for (int i = 0; i < floats.length; i++) { + int j = start + i; + assertTrue(floats[i] + " does not equal: " + (Float.MAX_VALUE - j), floats[i] == (Float.MAX_VALUE - j)); + + } + start += expectedLength; + } + + // getTerms + start = 0; + for (IndexReader subReader : subReaders) { + AtomicFieldCache atomicCache = subReader.getFieldCache(); + String[] terms = atomicCache.getStrings("theRandomUnicodeString"); + assertSame("Second request to cache return same array", terms, atomicCache.getStrings("theRandomUnicodeString")); + int expectedLength = subReader.maxDoc(); + assertTrue("doubles Size: " + terms.length+ " is not: " + expectedLength, terms.length == expectedLength); + for (int i = 0; i < expectedLength; i++) { + int j = start + i; + final String s = terms[i]; + assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[j], unicodeStrings[j] == null || unicodeStrings[j].equals(s)); + } + + // test bad field + terms = atomicCache.getStrings("bogusfield"); + start += expectedLength; + } + + // getTermsIndex + start = 0; + for (IndexReader subReader : subReaders) { + AtomicFieldCache atomicCache = subReader.getFieldCache(); + FieldCache.StringIndex termsIndex = atomicCache.getStringIndex("theRandomUnicodeString"); + assertSame("Second request to cache return same array", termsIndex, atomicCache.getStringIndex("theRandomUnicodeString")); + int expectedLength = subReader.maxDoc(); + assertTrue("doubles Size: " + termsIndex.order.length + " is not: " + expectedLength, termsIndex.order.length == expectedLength); + for (int i = 0; i < expectedLength; i++) { + int j = start + i; + final String s = termsIndex.lookup[termsIndex.order[i]]; + assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[j], unicodeStrings[j] == null || unicodeStrings[j].equals(s)); + } + + int nTerms = termsIndex.lookup.length; + // System.out.println("nTerms="+nTerms); + + TermEnum tenum = subReader.terms(); + for (int i=1; i subReaders = new LinkedList(); + ReaderUtil.gatherSubReaders(subReaders, reader); + AtomicFieldCache cache = subReaders.get(0).getFieldCache(); + String[] terms = cache.getStrings("foobar"); + FieldCache.StringIndex termsIndex = cache.getStringIndex("foobar"); + writer.close(); + r.close(); + dir.close(); + } + + private String generateString(int i) { + String s = null; + if (i > 0 && random.nextInt(3) == 1) { + // reuse past string -- try to find one that's not null + for(int iter = 0; iter < 10 && s == null;iter++) { + s = unicodeStrings[random.nextInt(i)]; + } + if (s == null) { + s = _TestUtil.randomUnicodeString(random); + } + } else { + s = _TestUtil.randomUnicodeString(random); + } + return s; + } + +} Index: lucene/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentReader.java (revision 1175586) +++ lucene/src/java/org/apache/lucene/index/SegmentReader.java (revision ) @@ -28,16 +28,19 @@ import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.search.cache.AtomicFieldCache; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BitVector; import org.apache.lucene.util.CloseableThreadLocal; +import org.apache.lucene.util.MapBackedSet; import org.apache.lucene.util.StringHelper; /** @@ -73,6 +76,32 @@ SegmentCoreReaders core; + private final SegmentFieldCacheImpl segmentCache; + + public SegmentReader() { + this.segmentCache = new SegmentFieldCacheImpl(this); + this.readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap()); + readerFinishedListeners.add(new ReaderFinishedListener() { + + public void finished(IndexReader reader) { + segmentCache.purgeCache(); + } + + }); + } + + public SegmentReader(SegmentFieldCacheImpl segmentCache) { + this.segmentCache = segmentCache; + this.readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap()); + readerFinishedListeners.add(new ReaderFinishedListener() { + + public void finished(IndexReader reader) { + SegmentReader.this.segmentCache.purgeCache(); + } + + }); + } + /** * Sets the initial value */ @@ -921,6 +950,11 @@ } @Override + public AtomicFieldCache getFieldCache() { + return segmentCache; + } + + @Override public int getTermInfosIndexDivisor() { return core.termsIndexDivisor; } Index: lucene/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1175586) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (revision ) @@ -21,6 +21,7 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.FieldCache; // javadocs import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.cache.AtomicFieldCache; import org.apache.lucene.store.*; import org.apache.lucene.util.ArrayUtil; @@ -1390,4 +1391,15 @@ public int getTermInfosIndexDivisor() { throw new UnsupportedOperationException("This reader does not support this method."); } + + /** + * Returns an {@link org.apache.lucene.search.cache.AtomicFieldCache} instance for this reader. + * Not all {@link IndexReader} subclasses implements this method. + * + * @return {@link org.apache.lucene.search.cache.AtomicFieldCache} instance for this reader + */ + public AtomicFieldCache getFieldCache() { + throw new UnsupportedOperationException("This reader does not support this method."); -} + } + +} Index: lucene/src/java/org/apache/lucene/search/cache/AtomicFieldCache.java =================================================================== --- lucene/src/java/org/apache/lucene/search/cache/AtomicFieldCache.java (revision ) +++ lucene/src/java/org/apache/lucene/search/cache/AtomicFieldCache.java (revision ) @@ -0,0 +1,228 @@ +package org.apache.lucene.search.cache; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.io.PrintStream; + +import static org.apache.lucene.search.FieldCache.*; + +/** + * Expert: Maintains caches of term values. + */ +public interface AtomicFieldCache { + + /** Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as a single byte and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param field Which field contains the single byte values. + * @return The values in the given field for each document. + * @throws java.io.IOException If any error occurs. + */ + public byte[] getBytes (String field) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as bytes and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param field Which field contains the bytes. + * @param parser Computes byte for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public byte[] getBytes (String field, ByteParser parser) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as shorts and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param field Which field contains the shorts. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public short[] getShorts(String field) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as shorts and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param field Which field contains the shorts. + * @param parser Computes short for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public short[] getShorts(String field, ShortParser parser) throws IOException; + + + /** Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as integers and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param field Which field contains the integers. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public int[] getInts(String field) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as integers and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param field Which field contains the integers. + * @param parser Computes integer for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public int[] getInts(String field, IntParser parser) throws IOException; + + + /** Checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as floats and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param field Which field contains the floats. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public float[] getFloats(String field) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as floats and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param field Which field contains the floats. + * @param parser Computes float for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public float[] getFloats(String field, FloatParser parser) throws IOException; + + + /** + * Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as longs and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * + * @param field Which field contains the longs. + * @return The values in the given field for each document. + * @throws java.io.IOException If any error occurs. + */ + public long[] getLongs(String field) throws IOException; + + /** + * Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as longs and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * + * @param field Which field contains the longs. + * @param parser Computes integer for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public long[] getLongs(String field, LongParser parser) throws IOException; + + + /** + * Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as integers and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * + * @param field Which field contains the doubles. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public double[] getDoubles(String field) throws IOException; + + /** + * Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as doubles and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * + * @param field Which field contains the doubles. + * @param parser Computes integer for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public double[] getDoubles(String field, DoubleParser parser) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none + * is found, reads the term values in field and returns an array + * of size reader.maxDoc() containing the value each document + * has in the given field. + * @param field Which field contains the strings. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public String[] getStrings (String field) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none + * is found reads the term values in field and returns + * an array of them in natural order, along with an array telling + * which element in the term array each document uses. + * @param field Which field contains the strings. + * @return Array of terms and index into the array for each document. + * @throws IOException If any error occurs. + */ + public StringIndex getStringIndex (String field) throws IOException; + + /** + * EXPERT: Generates an array of CacheEntry objects representing all items + * currently in the FieldCache. + *

+ * NOTE: These CacheEntry objects maintain a strong reference to the + * Cached Values. Maintaining references to a CacheEntry the IndexReader + * associated with it has garbage collected will prevent the Value itself + * from being garbage collected when the Cache drops the WeakReference. + *

+ * @lucene.experimental + */ + public abstract CacheEntry[] getCacheEntries(); + + /** + * Expert: drops all cache entries associated with this + * field cache. + */ + public abstract void purgeCache(); + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field and returns a bit set at the size of + * reader.maxDoc(), with turned on bits for each docid that + * does have a value for this field. + */ + public Bits getDocsWithField(String field) throws IOException; + + /** + * If non-null, FieldCacheImpl will warn whenever + * entries are created that are not sane according to + * {@link org.apache.lucene.util.FieldCacheSanityChecker}. + */ + public void setInfoStream(PrintStream stream); + + /** counterpart of {@link #setInfoStream(PrintStream)} */ + public PrintStream getInfoStream(); + +} Index: lucene/src/test-framework/org/apache/lucene/index/SlowMultiReaderWrapper.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/index/SlowMultiReaderWrapper.java (revision 1175586) +++ lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java (revision ) @@ -17,20 +17,43 @@ * limitations under the License. */ -import java.util.ArrayList; +import java.io.IOException; +import java.io.PrintStream; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; -import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.cache.AtomicFieldCache; +import org.apache.lucene.search.cache.StopFillCacheException; +import org.apache.lucene.util.*; /** * Acts like Lucene 4.x's SlowMultiReaderWrapper for testing * of top-level MultiTermEnum, MultiTermDocs, ... + * + * @lucene.insane */ public class SlowMultiReaderWrapper extends MultiReader { + private final IndexReader reader; + private final static InsaneFieldCache insaneFieldCache = new InsaneFieldCache(); + // Same instance for every SlowMultiReaderWrapper instance works well with MapBackedSet + private final static InsaneReaderFinishedListener insaneReaderFinishedListener = new InsaneReaderFinishedListener(); + public SlowMultiReaderWrapper(IndexReader reader) { - super(subReaders(reader)); + super(reader != null ? subReaders(reader): new IndexReader[0]); + this.reader = reader; + if (reader == null) { + return; - } - + } + + if (this.reader.readerFinishedListeners == null) { + this.reader.readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap()); + } + this.reader.addReaderFinishedListener(insaneReaderFinishedListener); + } + private static IndexReader[] subReaders(IndexReader reader) { ArrayList list = new ArrayList(); ReaderUtil.gatherSubReaders(list, reader); @@ -46,4 +69,814 @@ public String toString() { return "SlowMultiReaderWrapper(" + super.toString() + ")"; } + + public static void setInfoStream(PrintStream stream) { + insaneFieldCache.setInfoStream(stream); -} + } + + public static PrintStream getInfoStream() { + return insaneFieldCache.getInfoStream(); + } + + public static FieldCache.CacheEntry[] getCacheEntries() { + return insaneFieldCache.getCacheEntries(); + } + + public static void purgeAllCaches() { + insaneFieldCache.purgeAllCaches(); + } + + @Override + public AtomicFieldCache getFieldCache() { + return new InsaneNonAtomicFieldCache() { + + public byte[] getBytes(String field) throws IOException { + return insaneFieldCache.getBytes(reader, field); + } + + public byte[] getBytes(String field, FieldCache.ByteParser parser) throws IOException { + return insaneFieldCache.getBytes(reader, field, parser); + } + + public short[] getShorts(String field) throws IOException { + return insaneFieldCache.getShorts(reader, field); + } + + public short[] getShorts(String field, FieldCache.ShortParser parser) throws IOException { + return insaneFieldCache.getShorts(reader, field); + } + + public int[] getInts(String field) throws IOException { + return insaneFieldCache.getInts(reader, field); + } + + public int[] getInts(String field, FieldCache.IntParser parser) throws IOException { + return insaneFieldCache.getInts(reader, field, parser); + } + + public float[] getFloats(String field) throws IOException { + return insaneFieldCache.getFloats(reader, field); + } + + public float[] getFloats(String field, FieldCache.FloatParser parser) throws IOException { + return insaneFieldCache.getFloats(reader, field, parser); + } + + public long[] getLongs(String field) throws IOException { + return insaneFieldCache.getLongs(reader, field); + } + + public long[] getLongs(String field, FieldCache.LongParser parser) throws IOException { + return insaneFieldCache.getLongs(reader, field, parser); + } + + public double[] getDoubles(String field) throws IOException { + return insaneFieldCache.getDoubles(reader, field); + } + + public double[] getDoubles(String field, FieldCache.DoubleParser parser) throws IOException { + return insaneFieldCache.getDoubles(reader, field, parser); + } + + public String[] getStrings(String field) throws IOException { + return insaneFieldCache.getStrings(reader, field); + } + + public FieldCache.StringIndex getStringIndex(String field) throws IOException { + return insaneFieldCache.getStringIndex(reader, field); + } + + public FieldCache.CacheEntry[] getCacheEntries() { + return insaneFieldCache.getCacheEntries(); + } + + public void purgeCache() { + insaneFieldCache.purge(reader); + } + + public void purgeAllCaches() { + insaneFieldCache.purgeAllCaches(); + } + + public Bits getDocsWithField(String field) throws IOException { + return insaneFieldCache.getDocsWithField(reader, field); + } + + public void setInfoStream(PrintStream stream) { + insaneFieldCache.setInfoStream(stream); + } + + public PrintStream getInfoStream() { + return insaneFieldCache.getInfoStream(); + } + }; + } + + public static InsaneNonAtomicFieldCache getNonAtomicFieldCache() { + return (InsaneNonAtomicFieldCache) new SlowMultiReaderWrapper(null).getFieldCache(); + } + + /** + * Concrete subclasses maintain field values cache for more than one reader. + * + * @lucene.insane + */ + public interface InsaneNonAtomicFieldCache extends AtomicFieldCache { + + /** + * Purges all cache entries for all IndexReader keys. + */ + public void purgeAllCaches(); + + + } + + private static class InsaneReaderFinishedListener implements ReaderFinishedListener { + + public void finished(IndexReader reader) { + insaneFieldCache.purge(reader); + } + + } + + private static class InsaneFieldCache { + + private Map, Cache> caches; + + InsaneFieldCache() { + init(); + } + + private synchronized void init() { + caches = new HashMap,Cache>(9); + caches.put(Byte.TYPE, new ByteCache(this)); + caches.put(Short.TYPE, new ShortCache(this)); + caches.put(Integer.TYPE, new IntCache(this)); + caches.put(Float.TYPE, new FloatCache(this)); + caches.put(Long.TYPE, new LongCache(this)); + caches.put(Double.TYPE, new DoubleCache(this)); + caches.put(String.class, new StringCache(this)); + caches.put(FieldCache.StringIndex.class, new StringIndexCache(this)); + caches.put(DocsWithFieldCache.class, new DocsWithFieldCache(this)); + } + + public synchronized void purgeAllCaches() { + init(); + } + + public synchronized void purge(IndexReader r) { + for (Cache c : caches.values()) { + c.purge(r); + } + } + + public synchronized FieldCache.CacheEntry[] getCacheEntries() { + List result = new ArrayList(17); + for(final Map.Entry,Cache> cacheEntry: caches.entrySet()) { + final Cache cache = cacheEntry.getValue(); + final Class cacheType = cacheEntry.getKey(); + synchronized(cache.readerCache) { + for (final Map.Entry> readerCacheEntry : cache.readerCache.entrySet()) { + final Object readerKey = readerCacheEntry.getKey(); + if (readerKey == null) continue; + final Map innerCache = readerCacheEntry.getValue(); + for (final Map.Entry mapEntry : innerCache.entrySet()) { + Entry entry = mapEntry.getKey(); + result.add(new CacheEntryImpl(readerKey, entry.field, + cacheType, entry.custom, + mapEntry.getValue())); + } + } + } + } + return result.toArray(new FieldCache.CacheEntry[result.size()]); + } + + private static final class CacheEntryImpl extends FieldCache.CacheEntry { + private final Object readerKey; + private final String fieldName; + private final Class cacheType; + private final Object custom; + private final Object value; + CacheEntryImpl(Object readerKey, String fieldName, + Class cacheType, + Object custom, + Object value) { + this.readerKey = readerKey; + this.fieldName = fieldName; + this.cacheType = cacheType; + this.custom = custom; + this.value = value; + + // :HACK: for testing. + // if (null != locale || SortField.CUSTOM != sortFieldType) { + // throw new RuntimeException("Locale/sortFieldType: " + this); + // } + + } + @Override + public Object getReaderKey() { return readerKey; } + @Override + public String getFieldName() { return fieldName; } + @Override + public Class getCacheType() { return cacheType; } + @Override + public Object getCustom() { return custom; } + @Override + public Object getValue() { return value; } + } + + final static IndexReader.ReaderFinishedListener purgeReader = new IndexReader.ReaderFinishedListener() { + + public void finished(IndexReader reader) { + insaneFieldCache.purge(reader); + } + + }; + + /** Expert: Internal cache. */ + abstract static class Cache { + Cache() { + this.wrapper = null; + } + + Cache(InsaneFieldCache wrapper) { + this.wrapper = wrapper; + } + + final InsaneFieldCache wrapper; + + final Map> readerCache = new WeakHashMap>(); + + protected abstract Object createValue(IndexReader reader, Entry key) + throws IOException; + + /** Remove this reader from the cache, if present. */ + public void purge(IndexReader r) { + Object readerKey = r.getCoreCacheKey(); + synchronized(readerCache) { + readerCache.remove(readerKey); + } + } + + public Object get(IndexReader reader, Entry key) throws IOException { + Map innerCache; + Object value; + final Object readerKey = reader.getCoreCacheKey(); + synchronized (readerCache) { + innerCache = readerCache.get(readerKey); + if (innerCache == null) { + // First time this reader is using FieldCache + innerCache = new HashMap(); + readerCache.put(readerKey, innerCache); + reader.addReaderFinishedListener(purgeReader); + value = null; + } else { + value = innerCache.get(key); + } + if (value == null) { + value = new FieldCache.CreationPlaceholder(); + innerCache.put(key, value); + } + } + if (value instanceof FieldCache.CreationPlaceholder) { + synchronized (value) { + FieldCache.CreationPlaceholder progress = (FieldCache.CreationPlaceholder) value; + if (progress.value == null) { + progress.value = createValue(reader, key); + synchronized (readerCache) { + innerCache.put(key, progress.value); + } + + // Only check if key.custom (the parser) is + // non-null; else, we check twice for a single + // call to FieldCache.getXXX + if (key.custom != null && wrapper != null) { + final PrintStream infoStream = wrapper.getInfoStream(); + if (infoStream != null) { + printNewInsanity(infoStream, progress.value); + } + } + } + return progress.value; + } + } + return value; + } + + private void printNewInsanity(PrintStream infoStream, Object value) { + final FieldCacheSanityChecker.Insanity[] insanities = FieldCacheSanityChecker.checkSanity(wrapper.getCacheEntries()); + for(int i=0;i= mterms.length) break; + + // store term text + mterms[t] = term.text(); + + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = t; + } + + t++; + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + + if (t == 0) { + // if there are no terms, make the term array + // have a single null entry + mterms = new String[1]; + } else if (t < mterms.length) { + // if there are less terms than documents, + // trim off the dead array space + String[] terms = new String[t]; + System.arraycopy (mterms, 0, terms, 0, t); + mterms = terms; + } + + FieldCache.StringIndex value = new FieldCache.StringIndex(retArray, mterms); + return value; + } + } + + static final class DocsWithFieldCache extends Cache { + + DocsWithFieldCache(InsaneFieldCache wrapper) { + super(wrapper); + } + + @Override + protected Object createValue(IndexReader reader, Entry entryKey) + throws IOException { + final Entry entry = entryKey; + final String field = entry.field; + FixedBitSet res = null; + final TermDocs termDocs = reader.termDocs(); + final TermEnum termEnum = reader.terms(new Term(field)); + try { + do { + final Term term = termEnum.term(); + if (term == null || term.field() != field) break; + if (res == null) // late init + res = new FixedBitSet(reader.maxDoc()); + termDocs.seek(termEnum); + while (termDocs.next()) { + res.set(termDocs.doc()); + } + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + if (res == null) + return new Bits.MatchNoBits(reader.maxDoc()); + final int numSet = res.cardinality(); + if (numSet >= reader.numDocs()) { + // The cardinality of the BitSet is numDocs if all documents have a value. + // As deleted docs are not in TermDocs, this is always true + assert numSet == reader.numDocs(); + return new Bits.MatchAllBits(reader.maxDoc()); + } + return res; + } + } + + public Bits getDocsWithField(IndexReader reader, String field) throws IOException { + return (Bits) caches.get(DocsWithFieldCache.class).get(reader, new Entry(field, null)); + } + + private volatile PrintStream infoStream; + + public void setInfoStream(PrintStream stream) { + infoStream = stream; + } + + public PrintStream getInfoStream() { + return infoStream; + } + + } + +} Index: lucene/src/java/org/apache/lucene/index/SegmentFieldCacheImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentFieldCacheImpl.java (revision ) +++ lucene/src/java/org/apache/lucene/index/SegmentFieldCacheImpl.java (revision ) @@ -0,0 +1,703 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.cache.AtomicFieldCache; +import org.apache.lucene.search.cache.StopFillCacheException; +import org.apache.lucene.util.*; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * + */ +class SegmentFieldCacheImpl implements AtomicFieldCache { + + private final IndexReader indexReader; + private final Map, Cache> cache; + + SegmentFieldCacheImpl(IndexReader indexReader) { + if (indexReader == null) { + throw new IllegalArgumentException("Supplied indexReader cannot be null"); + } + + this.indexReader = indexReader; + cache = new HashMap, Cache>(9); + initCache(); + } + + private void initCache() { + cache.put(Byte.TYPE, new ByteCache(this, indexReader)); + cache.put(Short.TYPE, new ShortCache(this, indexReader)); + cache.put(Integer.TYPE, new IntCache(this, indexReader)); + cache.put(Float.TYPE, new FloatCache(this, indexReader)); + cache.put(Long.TYPE, new LongCache(this, indexReader)); + cache.put(Double.TYPE, new DoubleCache(this, indexReader)); + cache.put(String.class, new StringCache(this, indexReader)); + cache.put(FieldCache.StringIndex.class, new StringIndexCache(this, indexReader)); + cache.put(DocsWithFieldCache.class, new DocsWithFieldCache(this, indexReader)); + } + + /** + * {@inheritDoc} + */ + public byte[] getBytes(String field) throws IOException { + return getBytes(field, null); + } + + // inherit javadocs + public byte[] getBytes(String field, FieldCache.ByteParser parser) throws IOException { + return (byte[]) cache.get(Byte.TYPE).get(new Entry(field, parser)); + } + + // inherit javadocs + public short[] getShorts(String field) throws IOException { + return getShorts(field, null); + } + + // inherit javadocs + public short[] getShorts(String field, FieldCache.ShortParser parser) throws IOException { + return (short[]) cache.get(Short.TYPE).get(new Entry(field, parser)); + } + + // inherit javadocs + public int[] getInts(String field) throws IOException { + return getInts(field, null); + } + + // inherit javadocs + public int[] getInts(String field, FieldCache.IntParser parser) throws IOException { + return (int[]) cache.get(Integer.TYPE).get(new Entry(field, parser)); + } + + // inherit javadocs + public float[] getFloats(String field) throws IOException { + return getFloats(field, null); + } + + // inherit javadocs + public float[] getFloats(String field, FieldCache.FloatParser parser) throws IOException { + return (float[]) cache.get(Float.TYPE).get(new Entry(field, parser)); + } + + public long[] getLongs(String field) throws IOException { + return getLongs(field, null); + } + + // inherit javadocs + public long[] getLongs(String field, FieldCache.LongParser parser) throws IOException { + return (long[]) cache.get(Long.TYPE).get(new Entry(field, parser)); + } + + // inherit javadocs + public double[] getDoubles(String field) throws IOException { + return getDoubles(field, null); + } + + // inherit javadocs + public double[] getDoubles(String field, FieldCache.DoubleParser parser) throws IOException { + return (double[]) cache.get(Double.TYPE).get(new Entry(field, parser)); + } + + // inherit javadocs + public String[] getStrings(String field) throws IOException { + return (String[]) cache.get(String.class).get(new Entry(field, null)); + } + + // inherit javadocs + public FieldCache.StringIndex getStringIndex(String field) throws IOException { + return (FieldCache.StringIndex) cache.get(FieldCache.StringIndex.class).get(new Entry(field, null)); + } + + public Bits getDocsWithField(String field) + throws IOException { + return (Bits) cache.get(DocsWithFieldCache.class).get(new Entry(field, null)); + } + + private volatile PrintStream infoStream; + + public void setInfoStream(PrintStream stream) { + infoStream = stream; + } + + public PrintStream getInfoStream() { + return infoStream; + } + + public FieldCache.CacheEntry[] getCacheEntries() { + List result = new ArrayList(17); + for (final Map.Entry, Cache> cacheEntry : cache.entrySet()) { + final Class cacheType = cacheEntry.getKey(); + final Cache cache = cacheEntry.getValue(); + synchronized (cache.readerCache) { + for (final Map.Entry mapEntry : cache.readerCache.entrySet()) { + Entry entry = (Entry) mapEntry.getKey(); + result.add(new CacheEntryImpl(indexReader, entry.field, cacheType, entry.custom, mapEntry.getValue())); + } + } + } + return result.toArray(new FieldCache.CacheEntry[result.size()]); + } + + public void purgeCache() { + cache.clear(); + initCache(); + } + + static abstract class Cache { + + final AtomicFieldCache wrapper; + private final IndexReader indexReader; + private final Map readerCache; + + Cache(AtomicFieldCache wrapper, IndexReader indexReader) { + this.wrapper = wrapper; + this.indexReader = indexReader; + this.readerCache = new HashMap(); + } + + protected abstract Object createValue(IndexReader reader, Entry entryKey) throws IOException; + + @SuppressWarnings("unchecked") + public Object get(Entry key) throws IOException { + Object value; + + synchronized (readerCache) { + value = readerCache.get(key); + if (value == null) { + value = new FieldCache.CreationPlaceholder(); + readerCache.put(key, value); + } + } + if (value instanceof FieldCache.CreationPlaceholder) { + synchronized (value) { + FieldCache.CreationPlaceholder progress = (FieldCache.CreationPlaceholder) value; + if (progress.value != null) { + return progress.value; + } + progress.value = createValue(indexReader, key); + synchronized (readerCache) { + readerCache.put(key, progress.value); + } + + // Only check if key.custom (the parser) is + // non-null; else, we check twice for a single + // call to FieldCache.getXXX + if (key.custom != null && wrapper != null) { + final PrintStream infoStream = wrapper.getInfoStream(); + if (infoStream != null) { + printNewInsanity(infoStream, progress.value); + } + } + return progress.value; + } + } + + return value; + } + + private void printNewInsanity(PrintStream infoStream, Object value) { + final FieldCacheSanityChecker.Insanity[] insanities = FieldCacheSanityChecker.checkSanity(wrapper.getCacheEntries()); + for(int i=0;i cacheType; + private final Object custom; + private final Object value; + + CacheEntryImpl(IndexReader indexReader, + String fieldName, + Class cacheType, + Object custom, + Object value) { + this.indexReader = indexReader; + this.fieldName = fieldName; + this.cacheType = cacheType; + this.custom = custom; + this.value = value; + } + + public Object getReaderKey() { + return indexReader; + } + + public String getFieldName() { + return fieldName; + } + + public Class getCacheType() { + return cacheType; + } + + public Object getCustom() { + return custom; + } + + public Object getValue() { + return value; + } + } + + static final class ByteCache extends Cache { + ByteCache(AtomicFieldCache wrapper, IndexReader indexReader) { + super(wrapper, indexReader); + } + @Override + protected Object createValue(IndexReader reader, Entry entryKey) + throws IOException { + Entry entry = entryKey; + String field = entry.field; + FieldCache.ByteParser parser = (FieldCache.ByteParser) entry.custom; + if (parser == null) { + return wrapper.getBytes(field, FieldCache.DEFAULT_BYTE_PARSER); + } + final byte[] retArray = new byte[reader.maxDoc()]; + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field)); + try { + do { + Term term = termEnum.term(); + if (term==null || term.field() != field) break; + byte termval = parser.parseByte(term.text()); + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } catch (StopFillCacheException stop) { + } finally { + termDocs.close(); + termEnum.close(); + } + return retArray; + } + } + + static final class ShortCache extends Cache { + ShortCache(AtomicFieldCache wrapper, IndexReader indexReader) { + super(wrapper, indexReader); + } + + @Override + protected Object createValue(IndexReader reader, Entry entryKey) + throws IOException { + Entry entry = entryKey; + String field = entry.field; + FieldCache.ShortParser parser = (FieldCache.ShortParser) entry.custom; + if (parser == null) { + return wrapper.getShorts(field, FieldCache.DEFAULT_SHORT_PARSER); + } + final short[] retArray = new short[reader.maxDoc()]; + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field)); + try { + do { + Term term = termEnum.term(); + if (term==null || term.field() != field) break; + short termval = parser.parseShort(term.text()); + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } catch (StopFillCacheException stop) { + } finally { + termDocs.close(); + termEnum.close(); + } + return retArray; + } + } + + static final class IntCache extends Cache { + + IntCache(AtomicFieldCache wrapper, IndexReader indexReader) { + super(wrapper, indexReader); + } + + @Override + protected Object createValue(IndexReader reader, Entry entryKey) + throws IOException { + Entry entry = entryKey; + String field = entry.field; + FieldCache.IntParser parser = (FieldCache.IntParser) entry.custom; + if (parser == null) { + try { + return wrapper.getInts(field, FieldCache.DEFAULT_INT_PARSER); + } catch (NumberFormatException ne) { + return wrapper.getInts(field, FieldCache.NUMERIC_UTILS_INT_PARSER); + } + } + int[] retArray = null; + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field)); + try { + do { + Term term = termEnum.term(); + if (term==null || term.field() != field) break; + int termval = parser.parseInt(term.text()); + if (retArray == null) // late init + retArray = new int[reader.maxDoc()]; + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } catch (StopFillCacheException stop) { + } finally { + termDocs.close(); + termEnum.close(); + } + if (retArray == null) // no values + retArray = new int[reader.maxDoc()]; + return retArray; + } + } + + static final class FloatCache extends Cache { + + FloatCache(AtomicFieldCache wrapper, IndexReader indexReader) { + super(wrapper, indexReader); + } + + @Override + protected Object createValue(IndexReader reader, Entry entryKey) + throws IOException { + Entry entry = entryKey; + String field = entry.field; + FieldCache.FloatParser parser = (FieldCache.FloatParser) entry.custom; + if (parser == null) { + try { + return wrapper.getFloats(field, FieldCache.DEFAULT_FLOAT_PARSER); + } catch (NumberFormatException ne) { + return wrapper.getFloats(field, FieldCache.NUMERIC_UTILS_FLOAT_PARSER); + } + } + float[] retArray = null; + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field)); + try { + do { + Term term = termEnum.term(); + if (term==null || term.field() != field) break; + float termval = parser.parseFloat(term.text()); + if (retArray == null) // late init + retArray = new float[reader.maxDoc()]; + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } catch (StopFillCacheException stop) { + } finally { + termDocs.close(); + termEnum.close(); + } + if (retArray == null) // no values + retArray = new float[reader.maxDoc()]; + return retArray; + } + } + + static final class LongCache extends Cache { + + LongCache(AtomicFieldCache wrapper, IndexReader indexReader) { + super(wrapper, indexReader); + } + + @Override + protected Object createValue(IndexReader reader, Entry entry) + throws IOException { + String field = entry.field; + FieldCache.LongParser parser = (FieldCache.LongParser) entry.custom; + if (parser == null) { + try { + return wrapper.getLongs(field, FieldCache.DEFAULT_LONG_PARSER); + } catch (NumberFormatException ne) { + return wrapper.getLongs(field, FieldCache.NUMERIC_UTILS_LONG_PARSER); + } + } + long[] retArray = null; + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term(field)); + try { + do { + Term term = termEnum.term(); + if (term==null || term.field() != field) break; + long termval = parser.parseLong(term.text()); + if (retArray == null) // late init + retArray = new long[reader.maxDoc()]; + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } catch (StopFillCacheException stop) { + } finally { + termDocs.close(); + termEnum.close(); + } + if (retArray == null) // no values + retArray = new long[reader.maxDoc()]; + return retArray; + } + } + + static final class DoubleCache extends Cache { + + DoubleCache(AtomicFieldCache wrapper, IndexReader indexReader) { + super(wrapper, indexReader); + } + + @Override + protected Object createValue(IndexReader reader, Entry entryKey) + throws IOException { + Entry entry = entryKey; + String field = entry.field; + FieldCache.DoubleParser parser = (FieldCache.DoubleParser) entry.custom; + if (parser == null) { + try { + return wrapper.getDoubles(field, FieldCache.DEFAULT_DOUBLE_PARSER); + } catch (NumberFormatException ne) { + return wrapper.getDoubles(field, FieldCache.NUMERIC_UTILS_DOUBLE_PARSER); + } + } + double[] retArray = null; + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field)); + try { + do { + Term term = termEnum.term(); + if (term==null || term.field() != field) break; + double termval = parser.parseDouble(term.text()); + if (retArray == null) // late init + retArray = new double[reader.maxDoc()]; + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } catch (StopFillCacheException stop) { + } finally { + termDocs.close(); + termEnum.close(); + } + if (retArray == null) // no values + retArray = new double[reader.maxDoc()]; + return retArray; + } + } + + static final class DocsWithFieldCache extends Cache { + DocsWithFieldCache(AtomicFieldCache wrapper, IndexReader indexReader) { + super(wrapper, indexReader); + } + + @Override + protected Object createValue(IndexReader reader, Entry entryKey) + throws IOException { + final Entry entry = entryKey; + final String field = entry.field; + FixedBitSet res = null; + final TermDocs termDocs = reader.termDocs(); + final TermEnum termEnum = reader.terms(new Term(field)); + try { + do { + final Term term = termEnum.term(); + if (term == null || term.field() != field) break; + if (res == null) // late init + res = new FixedBitSet(reader.maxDoc()); + termDocs.seek(termEnum); + while (termDocs.next()) { + res.set(termDocs.doc()); + } + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + if (res == null) + return new Bits.MatchNoBits(reader.maxDoc()); + final int numSet = res.cardinality(); + if (numSet >= reader.numDocs()) { + // The cardinality of the BitSet is numDocs if all documents have a value. + // As deleted docs are not in TermDocs, this is always true + assert numSet == reader.numDocs(); + return new Bits.MatchAllBits(reader.maxDoc()); + } + return res; + } + } + + static final class StringCache extends Cache { + + StringCache(AtomicFieldCache wrapper, IndexReader indexReader) { + super(wrapper, indexReader); + } + + @Override + protected Object createValue(IndexReader reader, Entry entryKey) + throws IOException { + String field = StringHelper.intern(entryKey.field); + final String[] retArray = new String[reader.maxDoc()]; + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field)); + final int termCountHardLimit = reader.maxDoc(); + int termCount = 0; + try { + do { + if (termCount++ == termCountHardLimit) { + // app is misusing the API (there is more than + // one term per doc); in this case we make best + // effort to load what we can (see LUCENE-2142) + break; + } + + Term term = termEnum.term(); + if (term==null || term.field() != field) break; + String termval = term.text(); + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + return retArray; + } + } + + static final class StringIndexCache extends Cache { + StringIndexCache(AtomicFieldCache wrapper, IndexReader indexReader) { + super(wrapper, indexReader); + } + + @Override + protected Object createValue(IndexReader reader, Entry entryKey) + throws IOException { + String field = StringHelper.intern(entryKey.field); + final int[] retArray = new int[reader.maxDoc()]; + String[] mterms = new String[reader.maxDoc()+1]; + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field)); + int t = 0; // current term number + + // an entry for documents that have no terms in this field + // should a document with no terms be at top or bottom? + // this puts them at the top - if it is changed, FieldDocSortedHitQueue + // needs to change as well. + mterms[t++] = null; + + try { + do { + Term term = termEnum.term(); + if (term==null || term.field() != field || t >= mterms.length) break; + + // store term text + mterms[t] = term.text(); + + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = t; + } + + t++; + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + + if (t == 0) { + // if there are no terms, make the term array + // have a single null entry + mterms = new String[1]; + } else if (t < mterms.length) { + // if there are less terms than documents, + // trim off the dead array space + String[] terms = new String[t]; + System.arraycopy (mterms, 0, terms, 0, t); + mterms = terms; + } + + FieldCache.StringIndex value = new FieldCache.StringIndex(retArray, mterms); + return value; + } + } + +} \ No newline at end of file