Index: solr/src/test/org/apache/solr/search/TestIndexSearcher.java =================================================================== --- solr/src/test/org/apache/solr/search/TestIndexSearcher.java (revision 947841) +++ solr/src/test/org/apache/solr/search/TestIndexSearcher.java (working copy) @@ -80,9 +80,6 @@ // Didn't work w/ older versions of lucene2.9 going from segment -> multi assertEquals(r1.getLeafReaders()[0], r2.getLeafReaders()[0]); - // make sure the String returned is the exact same instance (i.e. same FieldCache instance) - assertTrue(sval1 == getStringVal(sr2,"v_s",0)); - assertU(adoc("id","5", "v_f","3.14159")); assertU(adoc("id","6", "v_f","8983", "v_s","string6")); assertU(commit()); @@ -129,8 +126,6 @@ SolrIndexReader r6 = sr4.getSearcher().getReader(); assertEquals(1, r6.getLeafReaders()[0].numDocs()); // only a single doc left in the first segment assertTrue( !r5.getLeafReaders()[0].equals(r6.getLeafReaders()[0]) ); // readers now different - String afterDelete = getStringVal(sr6, "v_s",1); - assertTrue( beforeDelete == afterDelete ); // same field cache is used even though deletions are different sr5.close(); sr6.close(); Index: solr/src/test/org/apache/solr/search/TestSort.java =================================================================== --- solr/src/test/org/apache/solr/search/TestSort.java (revision 947841) +++ solr/src/test/org/apache/solr/search/TestSort.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.search.*; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.BytesRef; import org.apache.solr.util.AbstractSolrTestCase; import java.io.IOException; @@ -174,7 +175,7 @@ ScoreDoc[] sdocs = topDocs.scoreDocs; for (int j=0; j map = comp.getElevationMap(reader, core); - assertTrue( map.get( "aaa" ).priority.containsKey( "A" ) ); + assertTrue( map.get( "aaa" ).priority.containsKey( new BytesRef("A") ) ); assertNull( map.get( "bbb" ) ); // now change the file @@ -258,6 +259,6 @@ reader = core.getSearcher().get().getReader(); map = comp.getElevationMap(reader, core); assertNull( map.get( "aaa" ) ); - assertTrue( map.get( "bbb" ).priority.containsKey( "B" ) ); + assertTrue( map.get( "bbb" ).priority.containsKey( new BytesRef("B") ) ); } } Index: solr/src/java/org/apache/solr/schema/SortableDoubleField.java =================================================================== --- solr/src/java/org/apache/solr/schema/SortableDoubleField.java (revision 947841) +++ solr/src/java/org/apache/solr/schema/SortableDoubleField.java (working copy) @@ -24,6 +24,7 @@ import org.apache.solr.search.function.StringIndexDocValues; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import org.apache.solr.util.NumberUtils; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.XMLWriter; @@ -113,8 +114,8 @@ } public double doubleVal(int doc) { - int ord=order[doc]; - return ord==0 ? def : NumberUtils.SortableStr2double(lookup[ord]); + int ord=termsIndex.getOrd(doc); + return ord==0 ? def : NumberUtils.SortableStr2double(termsIndex.get(ord, new BytesRef()).utf8ToString()); } public String strVal(int doc) { Index: solr/src/java/org/apache/solr/schema/StrField.java =================================================================== --- solr/src/java/org/apache/solr/schema/StrField.java (revision 947841) +++ solr/src/java/org/apache/solr/schema/StrField.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.XMLWriter; import org.apache.solr.search.function.ValueSource; @@ -77,7 +78,7 @@ } public int intVal(int doc) { - int ord=order[doc]; + int ord=termsIndex.getOrd(doc); return ord; } @@ -90,8 +91,12 @@ } public String strVal(int doc) { - int ord=order[doc]; - return lookup[ord]; + int ord=termsIndex.getOrd(doc); + if (ord == 0) { + return null; + } else { + return termsIndex.get(ord, new BytesRef()).utf8ToString(); + } } public String toString(int doc) { Index: solr/src/java/org/apache/solr/schema/DateField.java =================================================================== --- solr/src/java/org/apache/solr/schema/DateField.java (revision 947841) +++ solr/src/java/org/apache/solr/schema/DateField.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.DateUtil; import org.apache.solr.request.SolrQueryRequest; @@ -434,7 +435,7 @@ } public int intVal(int doc) { - int ord=order[doc]; + int ord=termsIndex.getOrd(doc); return ord; } @@ -447,8 +448,9 @@ } public String strVal(int doc) { - int ord=order[doc]; - return ft.indexedToReadable(lookup[ord]); + int ord=termsIndex.getOrd(doc); + // nocommit -- pass BytesRef directly? + return ft.indexedToReadable(termsIndex.get(ord, new BytesRef()).utf8ToString()); } public String toString(int doc) { Index: solr/src/java/org/apache/solr/schema/SortableLongField.java =================================================================== --- solr/src/java/org/apache/solr/schema/SortableLongField.java (revision 947841) +++ solr/src/java/org/apache/solr/schema/SortableLongField.java (working copy) @@ -24,6 +24,7 @@ import org.apache.solr.search.function.StringIndexDocValues; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import org.apache.solr.util.NumberUtils; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.XMLWriter; @@ -110,8 +111,8 @@ } public long longVal(int doc) { - int ord=order[doc]; - return ord==0 ? def : NumberUtils.SortableStr2long(lookup[ord],0,5); + int ord=termsIndex.getOrd(doc); + return ord==0 ? def : NumberUtils.SortableStr2long(termsIndex.get(ord, new BytesRef()).utf8ToString(),0,5); } public double doubleVal(int doc) { Index: solr/src/java/org/apache/solr/schema/SortableFloatField.java =================================================================== --- solr/src/java/org/apache/solr/schema/SortableFloatField.java (revision 947841) +++ solr/src/java/org/apache/solr/schema/SortableFloatField.java (working copy) @@ -24,6 +24,7 @@ import org.apache.solr.search.function.StringIndexDocValues; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import org.apache.solr.util.NumberUtils; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.XMLWriter; @@ -101,8 +102,8 @@ } public float floatVal(int doc) { - int ord=order[doc]; - return ord==0 ? def : NumberUtils.SortableStr2float(lookup[ord]); + int ord=termsIndex.getOrd(doc); + return ord==0 ? def : NumberUtils.SortableStr2float(termsIndex.get(ord, new BytesRef()).utf8ToString()); } public int intVal(int doc) { Index: solr/src/java/org/apache/solr/schema/FieldType.java =================================================================== --- solr/src/java/org/apache/solr/schema/FieldType.java (revision 947841) +++ solr/src/java/org/apache/solr/schema/FieldType.java (working copy) @@ -342,6 +342,7 @@ return toExternal(f); // by default use the string } + // nocommit -- should we add variant that takes/returns BytesRef? /** Given an indexed term, return the human readable representation */ public String indexedToReadable(String indexedForm) { return indexedForm; Index: solr/src/java/org/apache/solr/schema/SortableIntField.java =================================================================== --- solr/src/java/org/apache/solr/schema/SortableIntField.java (revision 947841) +++ solr/src/java/org/apache/solr/schema/SortableIntField.java (working copy) @@ -24,6 +24,7 @@ import org.apache.solr.search.function.StringIndexDocValues; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import org.apache.solr.util.NumberUtils; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.XMLWriter; @@ -109,8 +110,8 @@ } public int intVal(int doc) { - int ord=order[doc]; - return ord==0 ? def : NumberUtils.SortableStr2int(lookup[ord],0,3); + int ord=termsIndex.getOrd(doc); + return ord==0 ? def : NumberUtils.SortableStr2int(termsIndex.get(ord, new BytesRef()).utf8ToString(),0,3); } public long longVal(int doc) { Index: solr/src/java/org/apache/solr/search/function/StringIndexDocValues.java =================================================================== --- solr/src/java/org/apache/solr/search/function/StringIndexDocValues.java (revision 947841) +++ solr/src/java/org/apache/solr/search/function/StringIndexDocValues.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.search.FieldCache; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import java.io.IOException; @@ -26,33 +27,31 @@ * Serves as base class for DocValues based on StringIndex **/ public abstract class StringIndexDocValues extends DocValues { - protected final FieldCache.StringIndex index; - protected final int[] order; - protected final String[] lookup; + protected final FieldCache.DocTermsIndex termsIndex; protected final ValueSource vs; public StringIndexDocValues(ValueSource vs, IndexReader reader, String field) throws IOException { try { - index = FieldCache.DEFAULT.getStringIndex(reader, field); + termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field); } catch (RuntimeException e) { throw new StringIndexException(field, e); } - order = index.order; - lookup = index.lookup; this.vs = vs; } protected abstract String toTerm(String readableValue); - @Override + @Override public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { // TODO: are lowerVal and upperVal in indexed form or not? lowerVal = lowerVal == null ? null : toTerm(lowerVal); upperVal = upperVal == null ? null : toTerm(upperVal); + final BytesRef spare = new BytesRef(); + int lower = Integer.MIN_VALUE; if (lowerVal != null) { - lower = index.binarySearchLookup(lowerVal); + lower = termsIndex.binarySearchLookup(new BytesRef(lowerVal), spare); if (lower < 0) { lower = -lower-1; } else if (!includeLower) { @@ -62,7 +61,7 @@ int upper = Integer.MAX_VALUE; if (upperVal != null) { - upper = index.binarySearchLookup(upperVal); + upper = termsIndex.binarySearchLookup(new BytesRef(upperVal), spare); if (upper < 0) { upper = -upper-2; } else if (!includeUpper) { @@ -76,7 +75,7 @@ return new ValueSourceScorer(reader, this) { @Override public boolean matchesValue(int doc) { - int ord = order[doc]; + int ord = termsIndex.getOrd(doc); return ord >= ll && ord <= uu; } }; Index: solr/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java =================================================================== --- solr/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java (revision 947841) +++ solr/src/java/org/apache/solr/search/function/ReverseOrdFieldSource.java (working copy) @@ -26,7 +26,7 @@ import java.util.Map; /** - * Obtains the ordinal of the field value from the default Lucene {@link org.apache.lucene.search.FieldCache} using getStringIndex() + * Obtains the ordinal of the field value from the default Lucene {@link org.apache.lucene.search.FieldCache} using getTermsIndex() * and reverses the order. *
* The native lucene index order is used to assign an ordinal value for each field value. @@ -58,31 +58,30 @@ } public DocValues getValues(Map context, IndexReader reader) throws IOException { - final FieldCache.StringIndex sindex = FieldCache.DEFAULT.getStringIndex(reader, field); + final FieldCache.DocTermsIndex sindex = FieldCache.DEFAULT.getTermsIndex(reader, field); - final int arr[] = sindex.order; - final int end = sindex.lookup.length; + final int end = sindex.numOrd(); return new DocValues() { public float floatVal(int doc) { - return (float)(end - arr[doc]); + return (float)(end - sindex.getOrd(doc)); } public int intVal(int doc) { - return (int)(end - arr[doc]); + return (int)(end - sindex.getOrd(doc)); } public long longVal(int doc) { - return (long)(end - arr[doc]); + return (long)(end - sindex.getOrd(doc)); } public double doubleVal(int doc) { - return (double)(end - arr[doc]); + return (double)(end - sindex.getOrd(doc)); } public String strVal(int doc) { // the string value of the ordinal, not the string itself - return Integer.toString((end - arr[doc])); + return Integer.toString((end - sindex.getOrd(doc))); } public String toString(int doc) { Index: solr/src/java/org/apache/solr/search/function/OrdFieldSource.java =================================================================== --- solr/src/java/org/apache/solr/search/function/OrdFieldSource.java (revision 947841) +++ solr/src/java/org/apache/solr/search/function/OrdFieldSource.java (working copy) @@ -61,24 +61,24 @@ } public float floatVal(int doc) { - return (float)order[doc]; + return (float)termsIndex.getOrd(doc); } public int intVal(int doc) { - return order[doc]; + return termsIndex.getOrd(doc); } public long longVal(int doc) { - return (long)order[doc]; + return (long)termsIndex.getOrd(doc); } public double doubleVal(int doc) { - return (double)order[doc]; + return (double)termsIndex.getOrd(doc); } public String strVal(int doc) { // the string value of the ordinal, not the string itself - return Integer.toString(order[doc]); + return Integer.toString(termsIndex.getOrd(doc)); } public String toString(int doc) { Index: solr/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java =================================================================== --- solr/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java (revision 947841) +++ solr/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.search.*; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import java.io.IOException; @@ -53,33 +54,34 @@ // be extended or have it's values accessed. class MissingLastOrdComparator extends FieldComparator { private static final int NULL_ORD = Integer.MAX_VALUE; - private final String nullVal; + private final BytesRef nullVal; private final int[] ords; - private final String[] values; + private final BytesRef[] values; private final int[] readerGen; private int currentReaderGen = -1; - private String[] lookup; - private int[] order; + private FieldCache.DocTermsIndex termsIndex; private final String field; private int bottomSlot = -1; private int bottomOrd; - private String bottomValue; + private BytesRef bottomValue; private final boolean reversed; private final int sortPos; + private final BytesRef tempBR = new BytesRef(); public MissingLastOrdComparator(int numHits, String field, int sortPos, boolean reversed, String nullVal) { ords = new int[numHits]; - values = new String[numHits]; + values = new BytesRef[numHits]; readerGen = new int[numHits]; this.sortPos = sortPos; this.reversed = reversed; this.field = field; - this.nullVal = nullVal; + this.nullVal = nullVal == null ? null : new BytesRef(nullVal); } + @Override public int compare(int slot1, int slot2) { if (readerGen[slot1] == readerGen[slot2]) { int cmp = ords[slot1] - ords[slot2]; @@ -88,8 +90,8 @@ } } - final String val1 = values[slot1]; - final String val2 = values[slot2]; + final BytesRef val1 = values[slot1]; + final BytesRef val2 = values[slot2]; if (val1 == null) { if (val2 == null) { @@ -104,14 +106,14 @@ public int compareBottom(int doc) { assert bottomSlot != -1; - int order = this.order[doc]; + int order = termsIndex.getOrd(doc); int ord = (order == 0) ? NULL_ORD : order; final int cmp = bottomOrd - ord; if (cmp != 0) { return cmp; } - final String val2 = lookup[order]; + final BytesRef val2 = termsIndex.get(order, tempBR); // take care of the case where both vals are null if (bottomValue == val2) return 0; @@ -122,25 +124,25 @@ private void convert(int slot) { readerGen[slot] = currentReaderGen; int index = 0; - String value = values[slot]; + BytesRef value = values[slot]; if (value == null) { // should already be done - // ords[slot] = NULL_ORD; + assert ords[slot] == NULL_ORD; return; } if (sortPos == 0 && bottomSlot != -1 && bottomSlot != slot) { // Since we are the primary sort, the entries in the // queue are bounded by bottomOrd: - assert bottomOrd < lookup.length; + assert bottomOrd < termsIndex.numOrd(); if (reversed) { - index = binarySearch(lookup, value, bottomOrd, lookup.length-1); + index = binarySearch(tempBR, termsIndex, value, bottomOrd, termsIndex.numOrd()-1); } else { - index = binarySearch(lookup, value, 0, bottomOrd); + index = binarySearch(tempBR, termsIndex, value, 0, bottomOrd); } } else { // Full binary search - index = binarySearch(lookup, value); + index = binarySearch(tempBR, termsIndex, value); } if (index < 0) { @@ -149,26 +151,35 @@ ords[slot] = index; } + @Override public void copy(int slot, int doc) { - final int ord = order[doc]; - ords[slot] = ord == 0 ? NULL_ORD : ord; + final int ord = termsIndex.getOrd(doc); assert ord >= 0; - values[slot] = lookup[ord]; + if (ord == 0) { + ords[slot] = NULL_ORD; + values[slot] = nullVal; + } else { + ords[slot] = ord; + if (values[slot] == null) { + values[slot] = new BytesRef(); + } + termsIndex.get(ord, values[slot]); + } readerGen[slot] = currentReaderGen; } + @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { - FieldCache.StringIndex currentReaderValues = FieldCache.DEFAULT.getStringIndex(reader, field); + termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field); currentReaderGen++; - order = currentReaderValues.order; - lookup = currentReaderValues.lookup; - assert lookup.length > 0; + assert termsIndex.numOrd() > 0; if (bottomSlot != -1) { convert(bottomSlot); bottomOrd = ords[bottomSlot]; } } + @Override public void setBottom(final int bottom) { bottomSlot = bottom; if (readerGen[bottom] != currentReaderGen) { @@ -180,12 +191,13 @@ bottomValue = values[bottom]; } + @Override public Comparable value(int slot) { Comparable v = values[slot]; return v==null ? nullVal : v; } - public String[] getValues() { + public BytesRef[] getValues() { return values; } Index: solr/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java =================================================================== --- solr/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java (revision 947841) +++ solr/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java (working copy) @@ -5,6 +5,7 @@ import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.Filter; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.NamedList; @@ -100,7 +101,7 @@ } @Override protected boolean lessThan(SegFacet a, SegFacet b) { - return a.terms[a.pos].compareTo(b.terms[b.pos]) < 0; + return a.si.get(a.pos, a.tempBR).compareTo(b.si.get(b.pos, b.tempBR)) < 0; } }; @@ -143,6 +144,7 @@ } } + // nocommit - should we push BytesRef collection into here?: FacetCollector collector; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { collector = new CountSortedFacetCollector(offset, limit, mincount); @@ -150,9 +152,11 @@ collector = new IndexSortedFacetCollector(offset, limit, mincount); } + final BytesRef tempBR = new BytesRef(); + while (queue.size() > 0) { SegFacet seg = queue.top(); - String val = seg.terms[seg.pos]; + BytesRef val = seg.si.get(seg.pos, tempBR); int count = 0; do { @@ -167,9 +171,9 @@ } else { seg = queue.updateTop(); } - } while (seg != null && val.compareTo(seg.terms[seg.pos]) == 0); + } while (seg != null && val.compareTo(seg.si.get(seg.pos, seg.tempBR)) == 0); - boolean stop = collector.collect(val, count); + boolean stop = collector.collect(val.utf8ToString(), count); if (stop) break; } @@ -192,20 +196,6 @@ return res; } - - - - - // first element of the fieldcache is null, so we need this comparator. - private static final Comparator nullStrComparator = new Comparator() { - public int compare(Object o1, Object o2) { - if (o1==null) return (o2==null) ? 0 : -1; - else if (o2==null) return 1; - return ((String)o1).compareTo((String)o2); - } - }; - - class SegFacet { SolrIndexReader reader; int readerOffset; @@ -215,32 +205,31 @@ this.readerOffset = readerOffset; } - int[] ords; - String[] terms; - + FieldCache.DocTermsIndex si; int startTermIndex; int endTermIndex; int[] counts; int pos; // only used during merge with other segments + final BytesRef tempBR = new BytesRef(); + void countTerms() throws IOException { - FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(reader, fieldName); - final String[] terms = this.terms = si.lookup; - final int[] termNum = this.ords = si.order; + si = FieldCache.DEFAULT.getTermsIndex(reader, fieldName); // SolrCore.log.info("reader= " + reader + " FC=" + System.identityHashCode(si)); if (prefix!=null) { - startTermIndex = Arrays.binarySearch(terms,prefix,nullStrComparator); + startTermIndex = si.binarySearchLookup(new BytesRef(prefix), tempBR); if (startTermIndex<0) startTermIndex=-startTermIndex-1; // find the end term. \uffff isn't a legal unicode char, but only compareTo // is used, so it should be fine, and is guaranteed to be bigger than legal chars. // TODO: switch to binarySearch version that takes start/end in Java6 - endTermIndex = Arrays.binarySearch(terms,prefix+"\uffff\uffff\uffff\uffff",nullStrComparator); + endTermIndex = si.binarySearchLookup(new BytesRef(prefix+"\uffff\uffff\uffff\uffff"), tempBR); + assert endTermIndex < 0; endTermIndex = -endTermIndex-1; } else { startTermIndex=0; - endTermIndex=terms.length; + endTermIndex=si.numOrd(); } final int nTerms=endTermIndex-startTermIndex; @@ -251,17 +240,17 @@ DocIdSet idSet = baseSet.getDocIdSet(reader); DocIdSetIterator iter = idSet.iterator(); - if (startTermIndex==0 && endTermIndex==terms.length) { + if (startTermIndex==0 && endTermIndex==si.numOrd()) { // specialized version when collecting counts for all terms int doc; while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) { - counts[termNum[doc]]++; + counts[si.getOrd(doc)]++; } } else { // version that adjusts term numbers because we aren't collecting the full range int doc; while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) { - int term = termNum[doc]; + int term = si.getOrd(doc); int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdxdocs. * The field must have at most one indexed token per document. @@ -384,23 +375,32 @@ FieldType ft = searcher.getSchema().getFieldType(fieldName); NamedList res = new NamedList(); - FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), fieldName); - final String[] terms = si.lookup; - final int[] termNum = si.order; + FieldCache.DocTermsIndex si = FieldCache.DEFAULT.getTermsIndex(searcher.getReader(), fieldName); - if (prefix!=null && prefix.length()==0) prefix=null; + final BytesRef prefixRef; + if (prefix == null) { + prefixRef = null; + } else if (prefix.length()==0) { + prefix = null; + prefixRef = null; + } else { + prefixRef = new BytesRef(prefix); + } + final BytesRef br = new BytesRef(); + int startTermIndex, endTermIndex; if (prefix!=null) { - startTermIndex = Arrays.binarySearch(terms,prefix,nullStrComparator); + startTermIndex = si.binarySearchLookup(prefixRef, br); if (startTermIndex<0) startTermIndex=-startTermIndex-1; // find the end term. \uffff isn't a legal unicode char, but only compareTo // is used, so it should be fine, and is guaranteed to be bigger than legal chars. - endTermIndex = Arrays.binarySearch(terms,prefix+"\uffff\uffff\uffff\uffff",nullStrComparator); + endTermIndex = si.binarySearchLookup(new BytesRef(prefix+"\uffff\uffff\uffff\uffff"), br); + assert endTermIndex < 0; endTermIndex = -endTermIndex-1; } else { startTermIndex=1; - endTermIndex=terms.length; + endTermIndex=si.numOrd(); } final int nTerms=endTermIndex-startTermIndex; @@ -413,7 +413,7 @@ DocIterator iter = docs.iterator(); while (iter.hasNext()) { - int term = termNum[iter.nextDoc()]; + int term = si.getOrd(iter.nextDoc()); int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx0 ? offset+limit : Integer.MAX_VALUE-1; maxsize = Math.min(maxsize, nTerms); - final BoundedTreeSet> queue = new BoundedTreeSet>(maxsize); + final BoundedTreeSet> queue = new BoundedTreeSet>(maxsize); int min=mincount-1; // the smallest value in the top 'N' values for (int i=0; imin rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very // important if a lot of the counts are repeated (like zero counts would be). - queue.add(new CountPair(terms[startTermIndex+i], c)); + queue.add(new CountPair(si.get(startTermIndex+i, new BytesRef()), c)); if (queue.size()>=maxsize) min=queue.last().val; } } // now select the right page from the results - for (CountPair p : queue) { + for (CountPair p : queue) { if (--off>=0) continue; if (--lim<0) break; - res.add(ft.indexedToReadable(p.key), p.val); + res.add(ft.indexedToReadable(p.key.utf8ToString()), p.val); } } else { // add results in index order @@ -459,7 +459,7 @@ int c = counts[i]; if (c=0) continue; if (--lim<0) break; - res.add(ft.indexedToReadable(terms[startTermIndex+i]), c); + res.add(ft.indexedToReadable(si.get(startTermIndex+i, br).utf8ToString()), c); } } } @@ -565,6 +565,7 @@ // use the filter cache // TODO: not a big deal, but there are prob more efficient ways to go from utf8 to string // TODO: need a term query that takes a BytesRef + // nocommit -- why the extra new String(...)? Term t = template.createTerm(new String(term.utf8ToString())); c = searcher.numDocs(new TermQuery(t), docs); } else { Index: solr/src/java/org/apache/solr/request/UnInvertedField.java =================================================================== --- solr/src/java/org/apache/solr/request/UnInvertedField.java (revision 947841) +++ solr/src/java/org/apache/solr/request/UnInvertedField.java (working copy) @@ -653,11 +653,11 @@ int i = 0; final FieldFacetStats[] finfo = new FieldFacetStats[facet.length]; //Initialize facetstats, if facets have been passed in - FieldCache.StringIndex si; + FieldCache.DocTermsIndex si; for (String f : facet) { FieldType facet_ft = searcher.getSchema().getFieldType(f); try { - si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f); + si = FieldCache.DEFAULT.getTermsIndex(searcher.getReader(), f); } catch (IOException e) { throw new RuntimeException("failed to open field cache for: " + f, e); Index: solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java =================================================================== --- solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java (revision 947841) +++ solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java (working copy) @@ -44,6 +44,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.DOMUtil; @@ -99,7 +100,7 @@ final String analyzed; final BooleanClause[] exclude; final BooleanQuery include; - final Map priority; + final Map priority; // use singletons so hashCode/equals on Sort will just work final FieldComparatorSource comparatorSource; @@ -111,12 +112,12 @@ this.include = new BooleanQuery(); this.include.setBoost( 0 ); - this.priority = new HashMap(); + this.priority = new HashMap(); int max = elevate.size()+5; for( String id : elevate ) { TermQuery tq = new TermQuery( new Term( idField, id ) ); include.add( tq, BooleanClause.Occur.SHOULD ); - this.priority.put( id, max-- ); + this.priority.put( new BytesRef(id), max-- ); } if( exclude == null || exclude.isEmpty() ) { @@ -445,18 +446,19 @@ } class ElevationComparatorSource extends FieldComparatorSource { - private final Map priority; + private final Map priority; - public ElevationComparatorSource( final Map boosts) { + public ElevationComparatorSource( final Map boosts) { this.priority = boosts; } public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { return new FieldComparator() { - FieldCache.StringIndex idIndex; + FieldCache.DocTermsIndex idIndex; private final int[] values = new int[numHits]; int bottomVal; + private final BytesRef tempBR = new BytesRef(); public int compare(int slot1, int slot2) { return values[slot2] - values[slot1]; // values will be small enough that there is no overflow concern @@ -467,7 +469,7 @@ } private int docVal(int doc) throws IOException { - String id = idIndex.lookup[idIndex.order[doc]]; + BytesRef id = idIndex.get(idIndex.getOrd(doc), tempBR); Integer prio = priority.get(id); return prio == null ? 0 : prio.intValue(); } @@ -481,7 +483,7 @@ } public void setNextReader(IndexReader reader, int docBase) throws IOException { - idIndex = FieldCache.DEFAULT.getStringIndex(reader, fieldname); + idIndex = FieldCache.DEFAULT.getTermsIndex(reader, fieldname); } public Comparable value(int slot) { Index: solr/src/java/org/apache/solr/handler/component/FieldFacetStats.java =================================================================== --- solr/src/java/org/apache/solr/handler/component/FieldFacetStats.java (revision 947841) +++ solr/src/java/org/apache/solr/handler/component/FieldFacetStats.java (working copy) @@ -17,6 +17,7 @@ */ import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; import org.apache.solr.schema.FieldType; import java.util.ArrayList; @@ -37,12 +38,9 @@ public class FieldFacetStats { public final String name; - final FieldCache.StringIndex si; + final FieldCache.DocTermsIndex si; final FieldType ft; - final String[] terms; - final int[] termNum; - final int startTermIndex; final int endTermIndex; final int nTerms; @@ -53,16 +51,14 @@ final List> facetStatsTerms; - public FieldFacetStats(String name, FieldCache.StringIndex si, FieldType ft, int numStatsTerms) { + public FieldFacetStats(String name, FieldCache.DocTermsIndex si, FieldType ft, int numStatsTerms) { this.name = name; this.si = si; this.ft = ft; this.numStatsTerms = numStatsTerms; - terms = si.lookup; - termNum = si.order; startTermIndex = 1; - endTermIndex = terms.length; + endTermIndex = si.numOrd(); nTerms = endTermIndex - startTermIndex; facetStatsValues = new HashMap(); @@ -76,21 +72,28 @@ } } + // nocommit -- BytesRef? + // nocommit -- is this called? String getTermText(int docID) { - return terms[termNum[docID]]; + // nocommit -- can we reuse the BytesRef? + final BytesRef br = si.get(si.getOrd(docID), new BytesRef()); + String s = br == null ? null : br.utf8ToString(); + return s; } - public boolean facet(int docID, Double v) { - int term = termNum[docID]; + int term = si.getOrd(docID); int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) { - String key = ft.indexedToReadable(terms[term]); + // nocommit reuse BytesRef? + final BytesRef br = si.get(term, new BytesRef()); + String key = ft.indexedToReadable(br == null ? null : br.utf8ToString()); StatsValues stats = facetStatsValues.get(key); if (stats == null) { stats = new StatsValues(); facetStatsValues.put(key, stats); } + if (v != null) { stats.accumulate(v); } else { @@ -107,10 +110,12 @@ // Currently only used by UnInvertedField stats public boolean facetTermNum(int docID, int statsTermNum) { - int term = termNum[docID]; + int term = si.getOrd(docID); int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) { - String key = ft.indexedToReadable(terms[term]); + // nocommit reuse BytesRef? + final BytesRef br = si.get(term, new BytesRef()); + String key = br == null ? null : br.utf8ToString(); HashMap statsTermCounts = facetStatsTerms.get(statsTermNum); Integer statsTermCount = statsTermCounts.get(key); if (statsTermCount == null) { Index: solr/src/java/org/apache/solr/handler/component/StatsComponent.java =================================================================== --- solr/src/java/org/apache/solr/handler/component/StatsComponent.java (revision 947841) +++ solr/src/java/org/apache/solr/handler/component/StatsComponent.java (working copy) @@ -249,9 +249,9 @@ public NamedList getFieldCacheStats(String fieldName, String[] facet ) { FieldType ft = searcher.getSchema().getFieldType(fieldName); - FieldCache.StringIndex si = null; + FieldCache.DocTermsIndex si = null; try { - si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), fieldName); + si = FieldCache.DEFAULT.getTermsIndex(searcher.getReader(), fieldName); } catch (IOException e) { throw new RuntimeException( "failed to open field cache for: "+fieldName, e ); @@ -266,15 +266,15 @@ for( String f : facet ) { ft = searcher.getSchema().getFieldType(f); try { - si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f); + si = FieldCache.DEFAULT.getTermsIndex(searcher.getReader(), f); } catch (IOException e) { throw new RuntimeException( "failed to open field cache for: "+f, e ); } finfo[i++] = new FieldFacetStats( f, si, ft, 0 ); } + - DocIterator iter = docs.iterator(); while (iter.hasNext()) { int docID = iter.nextDoc(); Index: lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java =================================================================== --- lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java (revision 947841) +++ lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java (working copy) @@ -207,7 +207,7 @@ log("compare (should differ): " + innerArray + " to " + q.valSrc.getValues(reader).getInnerArray()); assertNotSame( - "different values shuold be loaded for a different field!", + "different values should be loaded for a different field!", innerArray, q.valSrc.getValues(reader).getInnerArray()); } catch (UnsupportedOperationException e) { if (!warned) { Index: lucene/src/test/org/apache/lucene/search/TestFieldCache.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFieldCache.java (revision 947841) +++ lucene/src/test/org/apache/lucene/search/TestFieldCache.java (working copy) @@ -24,13 +24,17 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.BytesRef; import java.io.IOException; +import java.util.Random; import java.io.ByteArrayOutputStream; import java.io.PrintStream; public class TestFieldCache extends LuceneTestCase { protected IndexReader reader; - private static final int NUM_DOCS = 1000; + private int NUM_DOCS; + private String[] unicodeStrings; public TestFieldCache(String s) { super(s); @@ -39,14 +43,17 @@ @Override protected void setUp() throws Exception { super.setUp(); + Random r = newRandom(); + NUM_DOCS = 1000 * _TestUtil.getRandomMultiplier(); RAMDirectory directory = new RAMDirectory(); - IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(500)); long theLong = Long.MAX_VALUE; double theDouble = Double.MAX_VALUE; byte theByte = Byte.MAX_VALUE; short theShort = Short.MAX_VALUE; int theInt = Integer.MAX_VALUE; float theFloat = Float.MAX_VALUE; + unicodeStrings = new String[NUM_DOCS]; for (int i = 0; i < NUM_DOCS; i++){ Document doc = new Document(); doc.add(new Field("theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); @@ -55,10 +62,28 @@ doc.add(new Field("theShort", String.valueOf(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add(new Field("theInt", String.valueOf(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add(new Field("theFloat", String.valueOf(theFloat--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + + // sometimes skip the field: + if (r.nextInt(40) != 17) { + String s = null; + if (i > 0 && r.nextInt(3) == 1) { + // reuse past string -- try to find one that's not null + for(int iter=0;iter<10 && s==null;iter++) { + s = unicodeStrings[r.nextInt(i)]; + } + if (s == null) { + s = _TestUtil.randomUnicodeString(r, 250); + } + } else { + s = _TestUtil.randomUnicodeString(r, 250); + } + unicodeStrings[i] = s; + doc.add(new Field("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + } writer.addDocument(doc); } + reader = writer.getReader(); writer.close(); - reader = IndexReader.open(directory, true); } public void testInfoStream() throws Exception { @@ -129,5 +154,27 @@ assertTrue(floats[i] + " does not equal: " + (Float.MAX_VALUE - i), floats[i] == (Float.MAX_VALUE - i)); } + + // getTermsIndex + FieldCache.DocTermsIndex termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString"); + assertSame("Second request to cache return same array", termsIndex, cache.getTermsIndex(reader, "theRandomUnicodeString")); + assertTrue("doubles Size: " + termsIndex.size() + " is not: " + NUM_DOCS, termsIndex.size() == NUM_DOCS); + final BytesRef br = new BytesRef(); + for (int i = 0; i < NUM_DOCS; i++) { + final BytesRef term = termsIndex.get(termsIndex.getOrd(i), br); + final String s = term == null ? null : term.utf8ToString(); + assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s)); + } + FieldCache.DEFAULT.purge(reader); + + // getTerms + FieldCache.DocTerms terms = cache.getTerms(reader, "theRandomUnicodeString"); + assertSame("Second request to cache return same array", terms, cache.getTerms(reader, "theRandomUnicodeString")); + assertTrue("doubles Size: " + terms.size() + " is not: " + NUM_DOCS, terms.size() == NUM_DOCS); + for (int i = 0; i < NUM_DOCS; i++) { + final BytesRef term = terms.get(i, br); + final String s = term == null ? null : term.utf8ToString(); + assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s)); + } } } \ No newline at end of file Index: lucene/src/test/org/apache/lucene/search/TestElevationComparator.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestElevationComparator.java (revision 947841) +++ lucene/src/test/org/apache/lucene/search/TestElevationComparator.java (working copy) @@ -24,13 +24,14 @@ import org.apache.lucene.search.FieldValueHitQueue.Entry; import org.apache.lucene.store.*; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.util.HashMap; import java.util.Map; public class TestElevationComparator extends LuceneTestCase { - private final Map priority = new HashMap(); + private final Map priority = new HashMap(); //@Test public void testSorting() throws Throwable { @@ -109,7 +110,7 @@ int max = (vals.length / 2) + 5; for (int i = 0; i < vals.length - 1; i += 2) { q.add(new TermQuery(new Term(vals[i], vals[i + 1])), BooleanClause.Occur.SHOULD); - priority.put(vals[i + 1], Integer.valueOf(max--)); + priority.put(new BytesRef(vals[i + 1]), Integer.valueOf(max--)); // System.out.println(" pri doc=" + vals[i+1] + " pri=" + (1+max)); } return q; @@ -125,9 +126,9 @@ } class ElevationComparatorSource extends FieldComparatorSource { - private final Map priority; + private final Map priority; - public ElevationComparatorSource(final Map boosts) { + public ElevationComparatorSource(final Map boosts) { this.priority = boosts; } @@ -135,8 +136,9 @@ public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { return new FieldComparator() { - FieldCache.StringIndex idIndex; + FieldCache.DocTermsIndex idIndex; private final int[] values = new int[numHits]; + private final BytesRef tempBR = new BytesRef(); int bottomVal; @Override @@ -150,7 +152,7 @@ } private int docVal(int doc) throws IOException { - String id = idIndex.lookup[idIndex.order[doc]]; + BytesRef id = idIndex.get(idIndex.getOrd(doc), tempBR); Integer prio = priority.get(id); return prio == null ? 0 : prio.intValue(); } @@ -167,7 +169,7 @@ @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { - idIndex = FieldCache.DEFAULT.getStringIndex(reader, fieldname); + idIndex = FieldCache.DEFAULT.getTermsIndex(reader, fieldname); } @Override Index: lucene/src/test/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/src/test/org/apache/lucene/util/_TestUtil.java (revision 947841) +++ lucene/src/test/org/apache/lucene/util/_TestUtil.java (working copy) @@ -118,7 +118,11 @@ /** Returns random string, including full unicode range. */ public static String randomUnicodeString(Random r) { - final int end = r.nextInt(20); + return randomUnicodeString(r, 20); + } + + public static String randomUnicodeString(Random r, int maxLength) { + final int end = r.nextInt(maxLength); if (end == 0) { // allow 0 length return ""; @@ -126,6 +130,8 @@ final char[] buffer = new char[end]; for (int i = 0; i < end; i++) { int t = r.nextInt(5); + buffer[i] = (char) (97 + r.nextInt(26)); + /* if (0 == t && i < end - 1) { // Make a surrogate pair // High surrogate @@ -137,6 +143,7 @@ else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800); else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff); else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff); + */ } return new String(buffer, 0, end); } Index: lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java =================================================================== --- lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java (revision 947841) +++ lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java (working copy) @@ -112,7 +112,7 @@ cache.purgeAllCaches(); cache.getInts(readerX, "theInt", FieldCache.DEFAULT_INT_PARSER); - cache.getStrings(readerX, "theInt"); + cache.getTerms(readerX, "theInt"); cache.getBytes(readerX, "theByte"); // // // @@ -135,9 +135,9 @@ FieldCache cache = FieldCache.DEFAULT; cache.purgeAllCaches(); - cache.getStrings(readerA, "theString"); - cache.getStrings(readerB, "theString"); - cache.getStrings(readerX, "theString"); + cache.getTerms(readerA, "theString"); + cache.getTerms(readerB, "theString"); + cache.getTerms(readerX, "theString"); cache.getBytes(readerX, "theByte"); Index: lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (revision 947841) +++ lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (working copy) @@ -18,6 +18,7 @@ */ import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.BytesRef; import java.text.Collator; import java.util.Locale; @@ -99,8 +100,8 @@ for (int i=0; i>> 1; - int cmp = lookup[mid].compareTo(key); - - if (cmp < 0) - low = mid + 1; - else if (cmp > 0) - high = mid - 1; - else - return mid; // key found - } - return -(low + 1); // key not found. - } - - /** All the term values, in natural order. */ - public final String[] lookup; - - /** For each document, an index into the lookup array. */ - public final int[] order; - - /** Creates one of these objects */ - public StringIndex (int[] values, String[] lookup) { - this.order = values; - this.lookup = lookup; - } - } - /** * Marker interface as super-interface to all parsers. It * is used to specify a custom parser to {@link @@ -490,28 +452,80 @@ public double[] getDoubles(IndexReader reader, String field, DoubleParser parser) throws IOException; + /** Returned by {@link #getTerms} */ + public abstract static class DocTerms { + /** The BytesRef argument must not be null; the method + * returns the same BytesRef, or null if the doc did + * not have this field or was deleted. */ + public abstract BytesRef get(int docID, BytesRef ret); + + /** Number of documents */ + public abstract int size(); + } + /** Checks the internal cache for an appropriate entry, and if none - * is found, reads the term values in field and returns an array - * of size reader.maxDoc() containing the value each document - * has in the given field. + * is found, reads the term values in field + * and returns a {@link DocTerms} instance, providing a + * method to retrieve the term (as a BytesRef) per document. * @param reader Used to get field values. * @param field Which field contains the strings. * @return The values in the given field for each document. * @throws IOException If any error occurs. */ - public String[] getStrings (IndexReader reader, String field) + public DocTerms getTerms (IndexReader reader, String field) throws IOException; + /** Returned by {@link #getTermsIndex} */ + public abstract static class DocTermsIndex { + + public int binarySearchLookup(BytesRef key, BytesRef spare) { + // this special case is the reason that Arrays.binarySearch() isn't useful. + if (key == null) + return 0; + + int low = 1; + int high = numOrd()-1; + + while (low <= high) { + int mid = (low + high) >>> 1; + int cmp = get(mid, spare).compareTo(key); + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return mid; // key found + } + return -(low + 1); // key not found. + } + + /** The BytesRef argument must not be null; the method + * returns the same BytesRef, or null if this ord is + * the null ord (0). */ + public abstract BytesRef get(int ord, BytesRef ret); + + /** Returns sort ord for this document. */ + public abstract int getOrd(int docID); + + /** Returns total unique ord count; this includes +1 for + * the null ord (always 0). */ + public abstract int numOrd(); + + /** Number of documents */ + public abstract int size(); + } + /** Checks the internal cache for an appropriate entry, and if none - * is found reads the term values in field and returns - * an array of them in natural order, along with an array telling - * which element in the term array each document uses. + * is found, reads the term values in field + * and returns a {@link DocTerms} instance, providing a + * method to retrieve the term (as a BytesRef) per document. * @param reader Used to get field values. * @param field Which field contains the strings. - * @return Array of terms and index into the array for each document. + * @return The values in the given field for each document. * @throws IOException If any error occurs. */ - public StringIndex getStringIndex (IndexReader reader, String field) + public DocTermsIndex getTermsIndex (IndexReader reader, String field) throws IOException; /** Index: lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 947841) +++ lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.document.NumericField; // for javadocs /** @@ -83,10 +84,11 @@ return new FieldCacheRangeFilter(field, null, lowerVal, upperVal, includeLower, includeUpper) { @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final FieldCache.StringIndex fcsi = FieldCache.DEFAULT.getStringIndex(reader, field); - final int lowerPoint = fcsi.binarySearchLookup(lowerVal); - final int upperPoint = fcsi.binarySearchLookup(upperVal); - + final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(reader, field); + final BytesRef spare = new BytesRef(); + final int lowerPoint = fcsi.binarySearchLookup(lowerVal == null ? null : new BytesRef(lowerVal), spare); + final int upperPoint = fcsi.binarySearchLookup(upperVal == null ? null : new BytesRef(upperVal), spare); + final int inclusiveLowerPoint, inclusiveUpperPoint; // Hints: @@ -125,7 +127,8 @@ return new FieldCacheDocIdSet(reader, true) { @Override final boolean matchDoc(int doc) { - return fcsi.order[doc] >= inclusiveLowerPoint && fcsi.order[doc] <= inclusiveUpperPoint; + final int docOrd = fcsi.getOrd(doc); + return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; } }; } Index: lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (revision 947841) +++ lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.TermDocs; // for javadocs /** @@ -108,19 +109,20 @@ @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return new FieldCacheTermsFilterDocIdSet(getFieldCache().getStringIndex(reader, field)); + return new FieldCacheTermsFilterDocIdSet(getFieldCache().getTermsIndex(reader, field)); } protected class FieldCacheTermsFilterDocIdSet extends DocIdSet { - private FieldCache.StringIndex fcsi; + private FieldCache.DocTermsIndex fcsi; private OpenBitSet openBitSet; - public FieldCacheTermsFilterDocIdSet(FieldCache.StringIndex fcsi) { + public FieldCacheTermsFilterDocIdSet(FieldCache.DocTermsIndex fcsi) { this.fcsi = fcsi; - openBitSet = new OpenBitSet(this.fcsi.lookup.length); + openBitSet = new OpenBitSet(this.fcsi.size()); + final BytesRef spare = new BytesRef(); for (int i=0;i 0) { openBitSet.fastSet(termNumber); } @@ -149,7 +151,7 @@ @Override public int nextDoc() { try { - while (!openBitSet.fastGet(fcsi.order[++doc])) {} + while (!openBitSet.fastGet(fcsi.getOrd(++doc))) {} } catch (ArrayIndexOutOfBoundsException e) { doc = NO_MORE_DOCS; } @@ -160,7 +162,7 @@ public int advance(int target) { try { doc = target; - while (!openBitSet.fastGet(fcsi.order[doc])) { + while (!openBitSet.fastGet(fcsi.getOrd(doc))) { doc++; } } catch (ArrayIndexOutOfBoundsException e) { Index: lucene/src/java/org/apache/lucene/search/function/ReverseOrdFieldSource.java =================================================================== --- lucene/src/java/org/apache/lucene/search/function/ReverseOrdFieldSource.java (revision 947841) +++ lucene/src/java/org/apache/lucene/search/function/ReverseOrdFieldSource.java (working copy) @@ -70,21 +70,20 @@ /*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */ @Override public DocValues getValues(IndexReader reader) throws IOException { - final FieldCache.StringIndex sindex = FieldCache.DEFAULT.getStringIndex(reader, field); + final FieldCache.DocTermsIndex termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field); - final int arr[] = sindex.order; - final int end = sindex.lookup.length; + final int end = termsIndex.numOrd(); return new DocValues() { /*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */ @Override public float floatVal(int doc) { - return (end - arr[doc]); + return (end - termsIndex.getOrd(doc)); } /* (non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */ @Override public int intVal(int doc) { - return end - arr[doc]; + return end - termsIndex.getOrd(doc); } /* (non-Javadoc) @see org.apache.lucene.search.function.DocValues#strVal(int) */ @Override @@ -100,7 +99,7 @@ /*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */ @Override Object getInnerArray() { - return arr; + return termsIndex; } }; } Index: lucene/src/java/org/apache/lucene/search/function/OrdFieldSource.java =================================================================== --- lucene/src/java/org/apache/lucene/search/function/OrdFieldSource.java (revision 947841) +++ lucene/src/java/org/apache/lucene/search/function/OrdFieldSource.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.DocTermsIndex; import java.io.IOException; @@ -69,18 +70,18 @@ /*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */ @Override public DocValues getValues(IndexReader reader) throws IOException { - final int[] arr = FieldCache.DEFAULT.getStringIndex(reader, field).order; + final DocTermsIndex termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field); return new DocValues() { /*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */ @Override public float floatVal(int doc) { - return arr[doc]; + return termsIndex.getOrd(doc); } /*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#strVal(int) */ @Override public String strVal(int doc) { // the string value of the ordinal, not the string itself - return Integer.toString(arr[doc]); + return Integer.toString(termsIndex.getOrd(doc)); } /*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */ @Override @@ -90,7 +91,7 @@ /*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */ @Override Object getInnerArray() { - return arr; + return termsIndex; } }; } Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldComparator.java (revision 947841) +++ lucene/src/java/org/apache/lucene/search/FieldComparator.java (working copy) @@ -28,7 +28,9 @@ import org.apache.lucene.search.FieldCache.FloatParser; import org.apache.lucene.search.FieldCache.IntParser; import org.apache.lucene.search.FieldCache.ShortParser; -import org.apache.lucene.search.FieldCache.StringIndex; +import org.apache.lucene.search.FieldCache.DocTermsIndex; +import org.apache.lucene.search.FieldCache.DocTerms; +import org.apache.lucene.util.BytesRef; /** * Expert: a FieldComparator compares hits so as to determine their @@ -616,14 +618,19 @@ } /** Sorts by a field's value using the Collator for a - * given Locale.*/ + * given Locale. + * + *

WARNING: this is likely very slow; you'll + * get much better performance using the + * CollationKeyAnalyzer or ICUCollationKeyAnalyzer. */ public static final class StringComparatorLocale extends FieldComparator { private final String[] values; - private String[] currentReaderValues; + private DocTerms currentDocTerms; private final String field; final Collator collator; private String bottom; + private final BytesRef tempBR = new BytesRef(); StringComparatorLocale(int numHits, String field, Locale locale) { values = new String[numHits]; @@ -648,7 +655,7 @@ @Override public int compareBottom(int doc) { - final String val2 = currentReaderValues[doc]; + final String val2 = currentDocTerms.get(doc, tempBR).utf8ToString(); if (bottom == null) { if (val2 == null) { return 0; @@ -662,12 +669,17 @@ @Override public void copy(int slot, int doc) { - values[slot] = currentReaderValues[doc]; + final BytesRef br = currentDocTerms.get(doc, tempBR); + if (br == null) { + values[slot] = null; + } else { + values[slot] = br.utf8ToString(); + } } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { - currentReaderValues = FieldCache.DEFAULT.getStrings(reader, field); + currentDocTerms = FieldCache.DEFAULT.getTerms(reader, field); } @Override @@ -677,39 +689,40 @@ @Override public Comparable value(int slot) { - return values[slot]; + final String s = values[slot]; + return s == null ? null : new BytesRef(values[slot]); } } - /** Sorts by field's natural String sort order, using + /** Sorts by field's natural Term sort order, using * ordinals. This is functionally equivalent to {@link - * StringValComparator}, but it first resolves the string + * TermValComparator}, but it first resolves the string * to their relative ordinal positions (using the index * returned by {@link FieldCache#getStringIndex}), and * does most comparisons using the ordinals. For medium * to large results, this comparator will be much faster - * than {@link StringValComparator}. For very small + * than {@link TermValComparator}. For very small * result sets it may be slower. */ - public static final class StringOrdValComparator extends FieldComparator { + public static final class TermOrdValComparator extends FieldComparator { private final int[] ords; - private final String[] values; + private final BytesRef[] values; private final int[] readerGen; private int currentReaderGen = -1; - private String[] lookup; - private int[] order; + private DocTermsIndex termsIndex; private final String field; private int bottomSlot = -1; private int bottomOrd; - private String bottomValue; + private BytesRef bottomValue; private final boolean reversed; private final int sortPos; + private final BytesRef tempBR = new BytesRef(); - public StringOrdValComparator(int numHits, String field, int sortPos, boolean reversed) { + public TermOrdValComparator(int numHits, String field, int sortPos, boolean reversed) { ords = new int[numHits]; - values = new String[numHits]; + values = new BytesRef[numHits]; readerGen = new int[numHits]; this.sortPos = sortPos; this.reversed = reversed; @@ -725,8 +738,8 @@ } } - final String val1 = values[slot1]; - final String val2 = values[slot2]; + final BytesRef val1 = values[slot1]; + final BytesRef val2 = values[slot2]; if (val1 == null) { if (val2 == null) { return 0; @@ -741,47 +754,48 @@ @Override public int compareBottom(int doc) { assert bottomSlot != -1; - int order = this.order[doc]; + int order = termsIndex.getOrd(doc); final int cmp = bottomOrd - order; if (cmp != 0) { return cmp; } - final String val2 = lookup[order]; if (bottomValue == null) { - if (val2 == null) { + if (order == 0) { + // unset return 0; } // bottom wins return -1; - } else if (val2 == null) { + } else if (order == 0) { // doc wins return 1; } - return bottomValue.compareTo(val2); + termsIndex.get(order, tempBR); + return bottomValue.compareTo(tempBR); } private void convert(int slot) { readerGen[slot] = currentReaderGen; int index = 0; - String value = values[slot]; + BytesRef value = values[slot]; if (value == null) { - ords[slot] = 0; + // 0 ord is null for all segments + assert ords[slot] == 0; return; } if (sortPos == 0 && bottomSlot != -1 && bottomSlot != slot) { // Since we are the primary sort, the entries in the // queue are bounded by bottomOrd: - assert bottomOrd < lookup.length; if (reversed) { - index = binarySearch(lookup, value, bottomOrd, lookup.length-1); + index = binarySearch(tempBR, termsIndex, value, bottomOrd, termsIndex.numOrd()-1); } else { - index = binarySearch(lookup, value, 0, bottomOrd); + index = binarySearch(tempBR, termsIndex, value, 0, bottomOrd); } } else { // Full binary search - index = binarySearch(lookup, value); + index = binarySearch(tempBR, termsIndex, value); } if (index < 0) { @@ -792,20 +806,24 @@ @Override public void copy(int slot, int doc) { - final int ord = order[doc]; - ords[slot] = ord; - assert ord >= 0; - values[slot] = lookup[ord]; + final int ord = termsIndex.getOrd(doc); + if (ord == 0) { + values[slot] = null; + } else { + ords[slot] = ord; + assert ord >= 0; + if (values[slot] == null) { + values[slot] = new BytesRef(); + } + termsIndex.get(ord, values[slot]); + } readerGen[slot] = currentReaderGen; } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { - StringIndex currentReaderValues = FieldCache.DEFAULT.getStringIndex(reader, field); + termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field); currentReaderGen++; - order = currentReaderValues.order; - lookup = currentReaderValues.lookup; - assert lookup.length > 0; if (bottomSlot != -1) { convert(bottomSlot); bottomOrd = ords[bottomSlot]; @@ -819,8 +837,6 @@ convert(bottomSlot); } bottomOrd = ords[bottom]; - assert bottomOrd >= 0; - assert bottomOrd < lookup.length; bottomValue = values[bottom]; } @@ -829,10 +845,6 @@ return values[slot]; } - public String[] getValues() { - return values; - } - public int getBottomSlot() { return bottomSlot; } @@ -842,26 +854,27 @@ } } - /** Sorts by field's natural String sort order. All - * comparisons are done using String.compareTo, which is + /** Sorts by field's natural Term sort order. All + * comparisons are done using BytesRef.compareTo, which is * slow for medium to large result sets but possibly * very fast for very small results sets. */ - public static final class StringValComparator extends FieldComparator { + public static final class TermValComparator extends FieldComparator { - private String[] values; - private String[] currentReaderValues; + private BytesRef[] values; + private DocTerms docTerms; private final String field; - private String bottom; + private BytesRef bottom; + private final BytesRef tempBR = new BytesRef(); - StringValComparator(int numHits, String field) { - values = new String[numHits]; + TermValComparator(int numHits, String field) { + values = new BytesRef[numHits]; this.field = field; } @Override public int compare(int slot1, int slot2) { - final String val1 = values[slot1]; - final String val2 = values[slot2]; + final BytesRef val1 = values[slot1]; + final BytesRef val2 = values[slot2]; if (val1 == null) { if (val2 == null) { return 0; @@ -876,7 +889,7 @@ @Override public int compareBottom(int doc) { - final String val2 = currentReaderValues[doc]; + BytesRef val2 = docTerms.get(doc, tempBR); if (bottom == null) { if (val2 == null) { return 0; @@ -890,12 +903,15 @@ @Override public void copy(int slot, int doc) { - values[slot] = currentReaderValues[doc]; + if (values[slot] == null) { + values[slot] = new BytesRef(); + } + docTerms.get(doc, values[slot]); } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { - currentReaderValues = FieldCache.DEFAULT.getStrings(reader, field); + docTerms = FieldCache.DEFAULT.getTerms(reader, field); } @Override @@ -909,15 +925,15 @@ } } - final protected static int binarySearch(String[] a, String key) { - return binarySearch(a, key, 0, a.length-1); + final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) { + return binarySearch(br, a, key, 1, a.numOrd()-1); } - final protected static int binarySearch(String[] a, String key, int low, int high) { + final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key, int low, int high) { while (low <= high) { int mid = (low + high) >>> 1; - String midVal = a[mid]; + BytesRef midVal = a.get(mid, br); int cmp; if (midVal != null) { cmp = midVal.compareTo(key); Index: lucene/src/java/org/apache/lucene/search/SortField.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SortField.java (revision 947841) +++ lucene/src/java/org/apache/lucene/search/SortField.java (working copy) @@ -413,10 +413,10 @@ return comparatorSource.newComparator(field, numHits, sortPos, reverse); case SortField.STRING: - return new FieldComparator.StringOrdValComparator(numHits, field, sortPos, reverse); + return new FieldComparator.TermOrdValComparator(numHits, field, sortPos, reverse); case SortField.STRING_VAL: - return new FieldComparator.StringValComparator(numHits, field); + return new FieldComparator.TermValComparator(numHits, field); default: throw new IllegalStateException("Illegal sort type: " + type); Index: lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 947841) +++ lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy) @@ -30,7 +30,12 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.PagedBytes; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.FieldCacheSanityChecker; @@ -57,8 +62,8 @@ caches.put(Float.TYPE, new FloatCache(this)); caches.put(Long.TYPE, new LongCache(this)); caches.put(Double.TYPE, new DoubleCache(this)); - caches.put(String.class, new StringCache(this)); - caches.put(StringIndex.class, new StringIndexCache(this)); + caches.put(DocTermsIndex.class, new DocTermsIndexCache(this)); + caches.put(DocTerms.class, new DocTermsCache(this)); } public synchronized void purgeAllCaches() { @@ -638,78 +643,216 @@ } } - // inherit javadocs - public String[] getStrings(IndexReader reader, String field) - throws IOException { - return (String[]) caches.get(String.class).get(reader, new Entry(field, (Parser)null)); + private static class DocTermsIndexImpl extends DocTermsIndex { + private final PagedBytes.Reader bytes; + private final PackedInts.Reader termOrdToBytesOffset; + private final PackedInts.Reader docToTermOrd; + private final int numOrd; + + public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { + this.bytes = bytes; + this.docToTermOrd = docToTermOrd; + this.termOrdToBytesOffset = termOrdToBytesOffset; + this.numOrd = numOrd; + } + + @Override + public int numOrd() { + return numOrd; + } + + @Override + public int getOrd(int docID) { + return (int) docToTermOrd.get(docID); + } + + @Override + public int size() { + return docToTermOrd.size(); + } + + @Override + public BytesRef get(int ord, BytesRef ret) { + // nocommit can we somehow not have to do this if + // here...? make it caller's job? + if (ord == 0) { + return null; + } else { + return bytes.fillUsingLengthPrefix(ret, termOrdToBytesOffset.get(ord)); + } + } } - static final class StringCache extends Cache { - StringCache(FieldCache wrapper) { + public DocTermsIndex getTermsIndex(IndexReader reader, String field) throws IOException { + return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, (Parser)null)); + } + + static final class DocTermsIndexCache extends Cache { + DocTermsIndexCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { + String field = StringHelper.intern(entryKey.field); - final String[] retArray = new String[reader.maxDoc()]; + Terms terms = MultiFields.getTerms(reader, field); - Terms terms = MultiFields.getTerms(reader, field); + final PagedBytes bytes = new PagedBytes(15); + + int startBytesBPV; + int startTermsBPV; + int startNumUniqueTerms; + if (terms != null) { + // Try for coarse estimate for number of bits; this + // should be an underestimate most of the time, which + // is fine -- GrowableWriter will reallocate as needed + long numUniqueTerms = 0; + try { + numUniqueTerms = terms.getUniqueTermCount(); + } catch (UnsupportedOperationException uoe) { + numUniqueTerms = -1; + } + if (numUniqueTerms != -1) { + startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4); + startTermsBPV = PackedInts.bitsRequired(numUniqueTerms); + // nocommit -- what to do if numUniqueTerms > Integer.MAX_VALUE??? + startNumUniqueTerms = (int) numUniqueTerms; + } else { + startBytesBPV = 1; + startTermsBPV = 1; + startNumUniqueTerms = 1; + } + } else { + startBytesBPV = 1; + startTermsBPV = 1; + startNumUniqueTerms = 1; + } + + GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms); + final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, reader.maxDoc()); + + // 0 is reserved for "unset" + bytes.copyUsingLengthPrefix(new BytesRef()); + int termOrd = 1; + + if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; + while(true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } + if (termOrd == termOrdToBytesOffset.size()) { + // NOTE: this code only runs if the incoming + // reader impl doesn't implement + // getUniqueTermCount (which should be uncommon) + termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1)); + } + termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term)); + bytes.copyUsingLengthPrefix(term); docs = termsEnum.docs(delDocs, docs); - final String termval = term.utf8ToString(); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } - retArray[docID] = termval; + docToTermOrd.set(docID, termOrd); } + termOrd++; } + + if (termOrdToBytesOffset.size() > termOrd) { + termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd); + } } - return retArray; + + // maybe an int-only impl? + return new DocTermsIndexImpl(bytes.freeze(), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd); } } - // inherit javadocs - public StringIndex getStringIndex(IndexReader reader, String field) - throws IOException { - return (StringIndex) caches.get(StringIndex.class).get(reader, new Entry(field, (Parser)null)); + private static class DocTermsImpl extends DocTerms { + private final PagedBytes.Reader bytes; + private final PackedInts.Reader docToOffset; + + public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) { + this.bytes = bytes; + this.docToOffset = docToOffset; + } + + @Override + public int size() { + return docToOffset.size(); + } + + @Override + public BytesRef get(int docID, BytesRef ret) { + final int pointer = (int) docToOffset.get(docID); + // nocommit -- can we somehow not do this? maybe + // expose separate method isSet so those uses that + // really can't accept empty string have an out? + if (pointer == 0) { + return null; + } else { + return bytes.fillUsingLengthPrefix(ret, pointer); + } + } } - static final class StringIndexCache extends Cache { - StringIndexCache(FieldCache wrapper) { + // TODO: this if DocTermsIndex was already created, we + // should share it... + public DocTerms getTerms(IndexReader reader, String field) throws IOException { + return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, (Parser)null)); + } + + static final class DocTermsCache extends Cache { + DocTermsCache(FieldCache wrapper) { super(wrapper); } @Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { + String field = StringHelper.intern(entryKey.field); - final int[] retArray = new int[reader.maxDoc()]; - String[] mterms = new String[reader.maxDoc()+1]; - - //System.out.println("FC: getStringIndex field=" + field); Terms terms = MultiFields.getTerms(reader, field); - int t = 0; // current term number + // Holds the actual term data, expanded. + final PagedBytes bytes = new PagedBytes(15); - // an entry for documents that have no terms in this field - // should a document with no terms be at top or bottom? - // this puts them at the top - if it is changed, FieldDocSortedHitQueue - // needs to change as well. - mterms[t++] = null; + int startBPV; if (terms != null) { + // Try for coarse estimate for number of bits; this + // should be an underestimate most of the time, which + // is fine -- GrowableWriter will reallocate as needed + long numUniqueTerms = 0; + try { + numUniqueTerms = terms.getUniqueTermCount(); + } catch (UnsupportedOperationException uoe) { + numUniqueTerms = -1; + } + if (numUniqueTerms != -1) { + startBPV = PackedInts.bitsRequired(numUniqueTerms*4); + } else { + startBPV = 1; + } + } else { + startBPV = 1; + } + + final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc()); + + // pointer==0 means not set + bytes.copyUsingLengthPrefix(new BytesRef()); + + if (terms != null) { final TermsEnum termsEnum = terms.iterator(); final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; @@ -718,42 +861,22 @@ if (term == null) { break; } - - // store term text - mterms[t] = term.utf8ToString(); - //System.out.println("FC: ord=" + t + " term=" + term.toBytesString()); - + final long pointer = bytes.copyUsingLengthPrefix(term); docs = termsEnum.docs(delDocs, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { break; } - //System.out.println("FC: docID=" + docID); - retArray[docID] = t; + docToOffset.set(docID, pointer); } - t++; } } - if (t == 0) { - // if there are no terms, make the term array - // have a single null entry - mterms = new String[1]; - } else if (t < mterms.length) { - // if there are less terms than documents, - // trim off the dead array space - String[] newTerms = new String[t]; - System.arraycopy (mterms, 0, newTerms, 0, t); - mterms = newTerms; - } - - StringIndex value = new StringIndex (retArray, mterms); - //System.out.println("FC: done\n"); - return value; + // maybe an int-only impl? + return new DocTermsImpl(bytes.freeze(), docToOffset.getMutable()); } } - private volatile PrintStream infoStream; public void setInfoStream(PrintStream stream) { Index: lucene/src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 947841) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -812,7 +812,7 @@ boolean updateDocument(Document doc, Analyzer analyzer, Term delTerm) throws CorruptIndexException, IOException { - + // This call is synchronized but fast final DocumentsWriterThreadState state = getThreadState(doc, delTerm); @@ -1287,6 +1287,8 @@ final static int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1; final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; + /* if you increase this, you must fix field cache impl for + * getTerms/getTermsIndex requires <= 32768 */ final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; private class ByteBlockAllocator extends ByteBlockPool.Allocator { Index: lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (revision 947841) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.packed.PackedInts; import java.util.HashMap; @@ -82,6 +83,7 @@ // all fields share this single logical byte[] private final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS); + private PagedBytes.Reader termBytesReader; final HashMap fields = new HashMap(); @@ -135,7 +137,7 @@ if (success) { indexLoaded = true; } - termBytes.finish(); + termBytesReader = termBytes.freeze(); } else { this.in = in; } @@ -347,7 +349,7 @@ private final void fillResult(int idx, TermsIndexResult result) { final long offset = termOffsets.get(idx); final int length = (int) (termOffsets.get(1+idx) - offset); - termBytes.fill(result.term, termBytesStart + offset, length); + termBytesReader.fill(result.term, termBytesStart + offset, length); result.position = idx * totalIndexInterval; result.offset = termsStart + termsDictOffsets.get(idx); } @@ -361,7 +363,7 @@ final long offset = termOffsets.get(mid); final int length = (int) (termOffsets.get(1+mid) - offset); - termBytes.fill(result.term, termBytesStart + offset, length); + termBytesReader.fill(result.term, termBytesStart + offset, length); int delta = termComp.compare(term, result.term); if (delta < 0) { @@ -382,7 +384,7 @@ final long offset = termOffsets.get(hi); final int length = (int) (termOffsets.get(1+hi) - offset); - termBytes.fill(result.term, termBytesStart + offset, length); + termBytesReader.fill(result.term, termBytesStart + offset, length); result.position = hi*totalIndexInterval; result.offset = termsStart + termsDictOffsets.get(hi); @@ -411,7 +413,7 @@ indexLoaded = true; in.close(); - termBytes.finish(); + termBytesReader = termBytes.freeze(); } } @@ -438,5 +440,8 @@ if (in != null && !indexLoaded) { in.close(); } + if (termBytesReader != null) { + termBytesReader.close(); + } } } Index: lucene/src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java (revision 947841) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java (working copy) @@ -1,129 +0,0 @@ -package org.apache.lucene.index.codecs.standard; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.CloseableThreadLocal; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.store.IndexInput; - -import java.util.List; -import java.util.ArrayList; -import java.io.Closeable; -import java.io.IOException; - -/** Represents a logical byte[] as a series of pages. You - * can write-once into the logical byte[], using copy, and - * then retrieve slices (BytesRef) into it using fill. */ -class PagedBytes implements Closeable { - private final List blocks = new ArrayList(); - private final int blockSize; - private final int blockBits; - private final int blockMask; - private int upto; - private byte[] currentBlock; - private final CloseableThreadLocal threadBuffers = new CloseableThreadLocal(); - - private static final byte[] EMPTY_BYTES = new byte[0]; - - /** 1< 0) { - int left = blockSize - upto; - if (left == 0) { - if (currentBlock != null) { - blocks.add(currentBlock); - } - currentBlock = new byte[blockSize]; - upto = 0; - left = blockSize; - } - if (left < byteCount) { - in.readBytes(currentBlock, upto, left, false); - upto = blockSize; - byteCount -= left; - } else { - in.readBytes(currentBlock, upto, (int) byteCount, false); - upto += byteCount; - byteCount = 0; - } - } - } - - /** Commits final byte[], trimming it if necessary. */ - public void finish() { - if (upto < blockSize) { - final byte[] newBlock = new byte[upto]; - System.arraycopy(currentBlock, 0, newBlock, 0, upto); - currentBlock = newBlock; - } - if (currentBlock == null) { - currentBlock = EMPTY_BYTES; - } - blocks.add(currentBlock); - currentBlock = null; - } - - public long getPointer() { - if (currentBlock == null) { - return 0; - } else { - return (blocks.size() * ((long) blockSize)) + upto; - } - } - - /** Get a slice out of the byte array. */ - public void fill(BytesRef b, long start, int length) { - assert length >= 0: "length=" + length; - final int index = (int) (start >> blockBits); - final int offset = (int) (start & blockMask); - b.length = length; - if (blockSize - offset >= length) { - // Within block - b.bytes = blocks.get(index); - b.offset = offset; - } else { - // Split - byte[] buffer = threadBuffers.get(); - if (buffer == null) { - buffer = new byte[length]; - threadBuffers.set(buffer); - } else if (buffer.length < length) { - buffer = ArrayUtil.grow(buffer, length); - threadBuffers.set(buffer); - } - b.bytes = buffer; - b.offset = 0; - System.arraycopy(blocks.get(index), offset, buffer, 0, blockSize-offset); - System.arraycopy(blocks.get(1+index), 0, buffer, blockSize-offset, length-(blockSize-offset)); - } - } - - public void close() { - threadBuffers.close(); - } -} Index: lucene/src/java/org/apache/lucene/util/PagedBytes.java =================================================================== --- lucene/src/java/org/apache/lucene/util/PagedBytes.java (revision 944313) +++ lucene/src/java/org/apache/lucene/util/PagedBytes.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.index.codecs.standard; +package org.apache.lucene.util; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -28,19 +28,88 @@ import java.io.IOException; /** Represents a logical byte[] as a series of pages. You - * can write-once into the logical byte[], using copy, and - * then retrieve slices (BytesRef) into it using fill. */ -class PagedBytes implements Closeable { + * can write-once into the logical byte[] (append only), + * using copy, and then retrieve slices (BytesRef) into it + * using fill. + * + *

@lucene.internal

*/ +public final class PagedBytes { private final List blocks = new ArrayList(); private final int blockSize; private final int blockBits; private final int blockMask; private int upto; private byte[] currentBlock; - private final CloseableThreadLocal threadBuffers = new CloseableThreadLocal(); private static final byte[] EMPTY_BYTES = new byte[0]; + public final static class Reader implements Closeable { + private final byte[][] blocks; + private final int blockBits; + private final int blockMask; + private final int blockSize; + private final CloseableThreadLocal threadBuffers = new CloseableThreadLocal(); + + public Reader(PagedBytes pagedBytes) { + blocks = new byte[pagedBytes.blocks.size()][]; + for(int i=0;i= 0: "length=" + length; + final int index = (int) (start >> blockBits); + final int offset = (int) (start & blockMask); + b.length = length; + if (blockSize - offset >= length) { + // Within block + b.bytes = blocks[index]; + b.offset = offset; + } else { + // Split + byte[] buffer = threadBuffers.get(); + if (buffer == null) { + buffer = new byte[length]; + threadBuffers.set(buffer); + } else if (buffer.length < length) { + buffer = ArrayUtil.grow(buffer, length); + threadBuffers.set(buffer); + } + b.bytes = buffer; + b.offset = 0; + System.arraycopy(blocks[index], offset, buffer, 0, blockSize-offset); + System.arraycopy(blocks[1+index], 0, buffer, blockSize-offset, length-(blockSize-offset)); + } + return b; + } + + /** Reads length as 1 or 2 byte vInt prefix, starting @ start */ + public BytesRef fillUsingLengthPrefix(BytesRef b, long start) { + final int index = (int) (start >> blockBits); + final int offset = (int) (start & blockMask); + final byte[] block = b.bytes = blocks[index]; + + if ((block[offset] & 128) == 0) { + b.length = block[offset]; + b.offset = offset+1; + } else { + b.length = (((int) (block[offset] & 0x7f)) << 8) | (block[1+offset] & 0xff); + b.offset = offset+2; + assert b.length > 0; + } + return b; + } + + public void close() { + threadBuffers.close(); + } + } + /** 1< 0) { + int left = blockSize - upto; + if (left == 0) { + if (currentBlock != null) { + blocks.add(currentBlock); + } + currentBlock = new byte[blockSize]; + upto = 0; + left = blockSize; + } + if (left < byteCount) { + System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, left); + upto = blockSize; + byteCount -= left; + bytesUpto += left; + } else { + System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, (int) byteCount); + upto += byteCount; + break; + } + } + } + /** Commits final byte[], trimming it if necessary. */ - public void finish() { + public Reader freeze() { if (upto < blockSize) { final byte[] newBlock = new byte[upto]; System.arraycopy(currentBlock, 0, newBlock, 0, upto); @@ -86,6 +182,7 @@ } blocks.add(currentBlock); currentBlock = null; + return new Reader(this); } public long getPointer() { @@ -96,34 +193,32 @@ } } - /** Get a slice out of the byte array. */ - public void fill(BytesRef b, long start, int length) { - assert length >= 0: "length=" + length; - final int index = (int) (start >> blockBits); - final int offset = (int) (start & blockMask); - b.length = length; - if (blockSize - offset >= length) { - // Within block - b.bytes = blocks.get(index); - b.offset = offset; - } else { - // Split - byte[] buffer = threadBuffers.get(); - if (buffer == null) { - buffer = new byte[length]; - threadBuffers.set(buffer); - } else if (buffer.length < length) { - buffer = ArrayUtil.grow(buffer, length); - threadBuffers.set(buffer); + /** Copy bytes in, writing the length as a 1 or 2 byte + * vInt prefix. */ + public long copyUsingLengthPrefix(BytesRef bytes) throws IOException { + + if (upto + bytes.length + 2 > blockSize) { + if (bytes.length + 2 > blockSize) { + throw new IllegalArgumentException("block size " + blockSize + " is too small to store length " + bytes.length + " bytes"); } - b.bytes = buffer; - b.offset = 0; - System.arraycopy(blocks.get(index), offset, buffer, 0, blockSize-offset); - System.arraycopy(blocks.get(1+index), 0, buffer, blockSize-offset, length-(blockSize-offset)); + if (currentBlock != null) { + blocks.add(currentBlock); + } + currentBlock = new byte[blockSize]; + upto = 0; } - } - public void close() { - threadBuffers.close(); + final long pointer = getPointer(); + + if (bytes.length < 128) { + currentBlock[upto++] = (byte) bytes.length; + } else { + currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8)); + currentBlock[upto++] = (byte) (bytes.length & 0xff); + } + System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length); + upto += bytes.length; + + return pointer; } } Index: lucene/src/java/org/apache/lucene/util/packed/GrowableWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/util/packed/GrowableWriter.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/packed/GrowableWriter.java (revision 0) @@ -0,0 +1,84 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * Implements {@link PackedInts.Mutable}, but grows the + * bit count of the underlying packed ints on-demand. + * + *

@lucene.internal

+ */ + +public class GrowableWriter implements PackedInts.Mutable { + + private long currentMaxValue; + private PackedInts.Mutable current; + + public GrowableWriter(int startBitsPerValue, int valueCount) { + currentMaxValue = 1 << startBitsPerValue; + current = PackedInts.getMutable(valueCount, startBitsPerValue); + } + + public long get(int index) { + return current.get(index); + } + + public int size() { + return current.size(); + } + + public int getBitsPerValue() { + return current.getBitsPerValue(); + } + + public PackedInts.Mutable getMutable() { + return current; + } + + public void set(int index, long value) { + if (value >= currentMaxValue) { + int bpv = getBitsPerValue(); + while(currentMaxValue <= value) { + bpv++; + currentMaxValue *= 2; + } + final int valueCount = size(); + PackedInts.Mutable next = PackedInts.getMutable(valueCount, bpv); + for(int i=0;i

- * Note: The blocks are used directly, so changes to the given block will + * Note: The values are used directly, so changes to the given block will * affect the structure. - * @param blocks used as the internal backing array. + * @param values used as the internal backing array. */ - public Direct8(byte[] blocks) { - super(blocks.length, BITS_PER_VALUE); - this.blocks = blocks; + public Direct8(byte[] values) { + super(values.length, BITS_PER_VALUE); + this.values = values; } public long get(final int index) { - return 0xFFL & blocks[index]; + return 0xFFL & values[index]; } public void set(final int index, final long value) { - blocks[index] = (byte)(value & 0xFF); + values[index] = (byte)(value & 0xFF); } public long ramBytesUsed() { - return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + blocks.length; + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.length; } public void clear() { - Arrays.fill(blocks, (byte)0); + Arrays.fill(values, (byte)0); } } Index: lucene/src/java/org/apache/lucene/util/packed/PackedInts.java =================================================================== --- lucene/src/java/org/apache/lucene/util/packed/PackedInts.java (revision 947841) +++ lucene/src/java/org/apache/lucene/util/packed/PackedInts.java (working copy) @@ -209,7 +209,7 @@ * @lucene.internal */ public static Mutable getMutable( - int valueCount, int bitsPerValue) throws IOException { + int valueCount, int bitsPerValue) { switch (bitsPerValue) { case 8: return new Direct8(valueCount); Index: lucene/src/java/org/apache/lucene/util/BytesRef.java =================================================================== --- lucene/src/java/org/apache/lucene/util/BytesRef.java (revision 947841) +++ lucene/src/java/org/apache/lucene/util/BytesRef.java (working copy) @@ -19,12 +19,17 @@ import java.util.Comparator; import java.io.UnsupportedEncodingException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.io.Externalizable; +import java.io.IOException; /** Represents byte[], as a slice (offset + length) into an * existing byte[]. * * @lucene.experimental */ -public final class BytesRef implements Comparable { +public final class BytesRef implements Comparable, Externalizable { + public static final byte[] EMPTY_BYTES = new byte[0]; /** The contents of the BytesRef. Should never be {@code null}. */ @@ -316,4 +321,25 @@ return this == other; } } + + public void writeExternal(ObjectOutput out) + throws IOException + { + out.writeInt(length); + if (length > 0) { + out.write(bytes, offset, length); + } + } + + public void readExternal( ObjectInput in ) throws + IOException, ClassNotFoundException { + length = in.readInt(); + offset = 0; + if (length > 0) { + bytes = new byte[length]; + in.read(bytes, 0, length); + } else { + bytes = null; + } + } } Index: lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 947841) +++ lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) @@ -47,9 +47,10 @@ import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.TermFreqVector; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; -import org.apache.lucene.search.FieldCache.StringIndex; +import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.search.FieldCache; /** @@ -325,11 +326,12 @@ Benchmark benchmark = execBenchmark(algLines); IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true); - StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country"); + DocTermsIndex idx = FieldCache.DEFAULT.getTermsIndex(r, "country"); final int maxDoc = r.maxDoc(); assertEquals(1000, maxDoc); + BytesRef br = new BytesRef(); for(int i=0;i<1000;i++) { - assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]); + assertNotNull("doc " + i + " has null country", idx.get(idx.getOrd(i), br)); } r.close(); } Index: lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java =================================================================== --- lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java (revision 947841) +++ lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java (working copy) @@ -21,9 +21,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.DocTerms; import org.apache.lucene.search.Filter; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.FilteredDocIdSet; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.spatial.tier.DistanceFilter; import org.apache.lucene.spatial.tier.DistanceUtils; @@ -62,7 +64,8 @@ @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final String[] geoHashValues = FieldCache.DEFAULT.getStrings(reader, geoHashField); + final DocTerms geoHashValues = FieldCache.DEFAULT.getTerms(reader, geoHashField); + final BytesRef br = new BytesRef(); final int docBase = nextDocBase; nextDocBase += reader.maxDoc(); @@ -70,8 +73,10 @@ return new FilteredDocIdSet(startingFilter.getDocIdSet(reader)) { @Override public boolean match(int doc) { - - String geoHash = geoHashValues[doc]; + + // nocommit: cutover to BytesRef so we don't have to + // make String here? + String geoHash = geoHashValues.get(doc, br).utf8ToString(); double[] coords = GeoHashUtils.decode(geoHash); double x = coords[0]; double y = coords[1]; Index: lucene/MIGRATE.txt =================================================================== --- lucene/MIGRATE.txt (revision 0) +++ lucene/MIGRATE.txt (revision 0) @@ -0,0 +1,44 @@ + +LUCENE-2380 + + * The field valeus returned when sorting by SortField.STRING are now + BytesRef. You can call value.utf8ToString() to convert back to + string, if necessary. + + * In FieldCache, getStrings has been replaced with getTerms, and + getStringIndex has been replaced with getTermsIndex + + DETAILS: + + In FieldCache, getStrings (returning String[]) has been replaced + with getTerms (returning DocTerms). DocTerms provides a get + method, taking a docID and a BytesRef to fill (which must not be + null), and it fills it in with the reference to the bytes for + that term. + + If you had code like this before: + + String[] values = FieldCache.DEFAULT.getStrings(reader, field); + ... + String aValue = values[docID]; + + you should do this instead: + + DocTerms values = FieldCache.DEFAULT.getTerms(reader, field); + ... + BytesRef term = new BytesRef(); + String aValue = values.get(docID, term).toString(); + + Note however that it can be costly to convert to String, so it's + better to work directly with the BytesRef. Also, because Lucene + now sorts terms by Unicode code point order (not UTF16 order, as + it did in 3.x), you should not use String.compareTo to compare + the Strings returned by FieldCache if your index contains any + characters outside of the Unicode BMP. + + * StringComparatorLocale is now more CPU costly than it was before, + since it converts BytesRef -> String on the fly. Also, the field + values returned when sorting by SortField.STRING are now + BytesRef. + CollationKeyAnalyzer, to store collation keys in the index. + Property changes on: lucene/MIGRATE.txt ___________________________________________________________________ Added: svn:eol-style + native