Index: lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 1141501) +++ lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (revision ) @@ -17,32 +17,16 @@ * limitations under the License. */ -import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.WeakHashMap; - +import org.apache.lucene.index.DocTermOrds; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.cache.ByteValuesCreator; -import org.apache.lucene.search.cache.DocTermsCreator; -import org.apache.lucene.search.cache.DocTermsIndexCreator; -import org.apache.lucene.search.cache.DoubleValuesCreator; -import org.apache.lucene.search.cache.EntryCreator; -import org.apache.lucene.search.cache.FloatValuesCreator; -import org.apache.lucene.search.cache.IntValuesCreator; -import org.apache.lucene.search.cache.LongValuesCreator; -import org.apache.lucene.search.cache.ShortValuesCreator; -import org.apache.lucene.search.cache.CachedArray.ByteValues; -import org.apache.lucene.search.cache.CachedArray.DoubleValues; -import org.apache.lucene.search.cache.CachedArray.FloatValues; -import org.apache.lucene.search.cache.CachedArray.IntValues; -import org.apache.lucene.search.cache.CachedArray.LongValues; -import org.apache.lucene.search.cache.CachedArray.ShortValues; +import org.apache.lucene.search.cache.*; +import org.apache.lucene.search.cache.CachedArray.*; import org.apache.lucene.util.FieldCacheSanityChecker; +import java.io.IOException; +import java.io.PrintStream; +import java.util.*; + /** * Expert: The default cache implementation, storing all values in memory. * A WeakHashMap is used for storage. @@ -70,6 +54,7 @@ caches.put(Double.TYPE, new Cache(this)); caches.put(DocTermsIndex.class, new Cache(this)); caches.put(DocTerms.class, new Cache(this)); + caches.put(DocTermOrds.class, new Cache(this)); } public synchronized void purgeAllCaches() { @@ -393,6 +378,11 @@ return (DocTerms)caches.get(DocTerms.class).get(reader, new Entry(field, creator)); } + @SuppressWarnings("unchecked") + public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException { + return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new Entry(field, new DocTermOrdsCreator(field, 0))); + } + private volatile PrintStream infoStream; public void setInfoStream(PrintStream stream) { Index: lucene/src/java/org/apache/lucene/search/FieldCache.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCache.java (revision 1068526) +++ lucene/src/java/org/apache/lucene/search/FieldCache.java (revision ) @@ -17,6 +17,7 @@ * limitations under the License. */ +import org.apache.lucene.index.DocTermOrds; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.cache.EntryCreator; @@ -654,6 +655,18 @@ throws IOException; /** + * Checks the internal cache for an appropriate entry, and if none is found, reads the term values + * in field and returns a {@link DocTermOrds} instance, providing a method to retrieve + * the term (as a BytesRef) per document. + * + * @param reader Used to build a {@link DocTermOrds} instance + * @param field Which field contains the strings. + * @return a {@link DocTermOrds} instance + * @throws IOException If any error occurs. + */ + public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException; + + /** * EXPERT: A unique Identifier/Description for each item in the FieldCache. * Can be useful for logging/debugging. * @lucene.experimental Index: lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java (revision ) +++ lucene/src/java/org/apache/lucene/search/cache/DocTermOrdsCreator.java (revision ) @@ -0,0 +1,51 @@ +package org.apache.lucene.search.cache; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocTermOrds; +import org.apache.lucene.index.IndexReader; + +import java.io.IOException; + +/** + * Creates {@link DocTermOrds} instances. + */ +public class DocTermOrdsCreator extends EntryCreatorWithOptions { + + private final String field; + + public DocTermOrdsCreator(String field, int flag) { + super(flag); + this.field = field; + } + + @Override + public DocTermOrds create(IndexReader reader) throws IOException { + return new DocTermOrds(reader, field); + } + + @Override + public DocTermOrds validate(DocTermOrds entry, IndexReader reader) throws IOException { + return entry; + } + + @Override + public EntryKey getCacheKey() { + return new SimpleEntryKey(DocTermOrdsCreator.class, field); + } +} Index: lucene/src/test/org/apache/lucene/search/TestFieldCache.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFieldCache.java (revision 1145730) +++ lucene/src/test/org/apache/lucene/search/TestFieldCache.java (revision ) @@ -19,28 +19,33 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; -import org.apache.lucene.util.BytesRef; -import java.io.IOException; + import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; public class TestFieldCache extends LuceneTestCase { protected IndexReader reader; private int NUM_DOCS; + private int NUM_ORDS; private String[] unicodeStrings; + private BytesRef[][] multiValued; private Directory directory; @Override public void setUp() throws Exception { super.setUp(); NUM_DOCS = atLeast(1000); + NUM_ORDS = atLeast(2); directory = newDirectory(); RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); long theLong = Long.MAX_VALUE; @@ -50,6 +55,7 @@ int theInt = Integer.MAX_VALUE; float theFloat = Float.MAX_VALUE; unicodeStrings = new String[NUM_DOCS]; + multiValued = new BytesRef[NUM_DOCS][NUM_ORDS]; if (VERBOSE) { System.out.println("TEST: setUp"); } @@ -65,21 +71,19 @@ // sometimes skip the field: if (random.nextInt(40) != 17) { - String s = null; - if (i > 0 && random.nextInt(3) == 1) { - // reuse past string -- try to find one that's not null - for(int iter=0;iter<10 && s==null;iter++) { - s = unicodeStrings[random.nextInt(i)]; + unicodeStrings[i] = generateString(i); + doc.add(newField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - } + } - if (s == null) { - s = _TestUtil.randomUnicodeString(random, 250); + + // sometimes skip the field: + if (random.nextInt(10) != 8) { + for (int j = 0; j < NUM_ORDS; j++) { + String newValue = generateString(i); + multiValued[i][j] = new BytesRef(newValue); + doc.add(newField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - } + } - } else { - s = _TestUtil.randomUnicodeString(random, 250); + Arrays.sort(multiValued[i]); - } + } - unicodeStrings[i] = s; - doc.add(newField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - } writer.addDocument(doc); } reader = writer.getReader(); @@ -210,6 +214,47 @@ // test bad field terms = cache.getTerms(reader, "bogusfield"); + // getDocTermOrds + DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"); + TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader); + assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField")); + DocTermOrds.TermOrdsIterator reuse = null; + for (int i = 0; i < NUM_DOCS; i++) { + reuse = termOrds.lookup(i, reuse); + final int[] buffer = new int[5]; + // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId + List values = new ArrayList(new LinkedHashSet(Arrays.asList(multiValued[i]))); + for (;;) { + int chunk = reuse.read(buffer); + if (chunk == 0) { + for (int ord = 0; ord < values.size(); ord++) { + BytesRef term = values.get(ord); + assertNull(String.format("Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term); + } + break; + } + + for(int idx=0; idx < chunk; idx++) { + int key = buffer[idx]; + termsEnum.seekExact((long) key); + String actual = termsEnum.term().utf8ToString(); + String expected = values.get(idx).utf8ToString(); + if (!expected.equals(actual)) { + reuse = termOrds.lookup(i, reuse); + reuse.read(buffer); + } + assertTrue(String.format("Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual)); + } + + if (chunk < buffer.length) { + break; + } + } + } + + // test bad field + termOrds = cache.getDocTermOrds(reader, "bogusfield"); + FieldCache.DEFAULT.purge(reader); } @@ -223,4 +268,21 @@ r.close(); dir.close(); } + + private String generateString(int i) { + String s = null; + if (i > 0 && random.nextInt(3) == 1) { + // reuse past string -- try to find one that's not null + for(int iter = 0; iter < 10 && s == null;iter++) { + s = unicodeStrings[random.nextInt(i)]; -} + } + if (s == null) { + s = _TestUtil.randomUnicodeString(random, 250); + } + } else { + s = _TestUtil.randomUnicodeString(random, 250); + } + return s; + } + +}