Index: lucene/src/test/org/apache/lucene/index/TestAddIndexes.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (revision 957599) +++ lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (working copy) @@ -464,7 +464,7 @@ private void verifyTermDocs(Directory dir, Term term, int numDocs) throws IOException { IndexReader reader = IndexReader.open(dir, true); - DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, term.field, new BytesRef(term.text)); + DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, term.field, term.bytes); int count = 0; while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) count++; Index: lucene/src/test/org/apache/lucene/index/TestPayloads.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestPayloads.java (revision 957599) +++ lucene/src/test/org/apache/lucene/index/TestPayloads.java (working copy) @@ -188,7 +188,7 @@ Term[] terms = generateTerms(fieldName, numTerms); StringBuilder sb = new StringBuilder(); for (int i = 0; i < terms.length; i++) { - sb.append(terms[i].text); + sb.append(terms[i].text()); sb.append(" "); } String content = sb.toString(); Index: lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java =================================================================== --- lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java (revision 957599) +++ lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java (working copy) @@ -109,8 +109,20 @@ fieldInfos.write(dir, segName); // sorts in UTF16 order, just like preflex: - Collections.sort(terms); + Comparator utf16comparator = new Comparator() { + @Override + public int compare(Term o1, Term o2) { + // TODO: this test could run faster if we didnt create new strings with text() + // and instead used a utf8-in-utf16-order bytesref comparator. + if (o1.field() == o2.field()) // fields are interned + return o1.text().compareTo(o2.text()); + else + return o1.field().compareTo(o2.field()); + } + }; + Collections.sort(terms, utf16comparator); + TermInfosWriter w = new TermInfosWriter(dir, segName, fieldInfos, 128); TermInfo ti = new TermInfo(); BytesRef utf8 = new BytesRef(10); Index: lucene/src/java/org/apache/lucene/index/Term.java =================================================================== --- lucene/src/java/org/apache/lucene/index/Term.java (revision 957599) +++ lucene/src/java/org/apache/lucene/index/Term.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.StringHelper; /** @@ -29,15 +30,22 @@ public final class Term implements Comparable, java.io.Serializable { String field; - String text; + BytesRef bytes; /** Constructs a Term with the given field and text. *

Note that a null field or null text value results in undefined * behavior for most Lucene APIs that accept a Term parameter. */ - public Term(String fld, String txt) { + public Term(String fld, BytesRef bytes) { field = fld == null ? null : StringHelper.intern(fld); - text = txt; + this.bytes = bytes; } + + /** Constructs a Term with the given field and text. + *

Note that a null field or null text value results in undefined + * behavior for most Lucene APIs that accept a Term parameter. */ + public Term(String fld, String text) { + this(fld, new BytesRef(text)); + } /** Constructs a Term with the given field and empty text. * This serves two purposes: 1) reuse of a Term with the same field. @@ -46,15 +54,20 @@ * @param fld */ public Term(String fld) { - this(fld, "", true); + this(fld, BytesRef.EMPTY, true); } /** @lucene.experimental */ - public Term(String fld, String txt, boolean intern) { + public Term(String fld, BytesRef bytes, boolean intern) { field = intern ? StringHelper.intern(fld) : fld; // field names are interned - text = txt; // unless already known to be + this.bytes = bytes; // unless already known to be } + /** @lucene.experimental */ + public Term(String fld, String text, boolean intern) { + this(fld, new BytesRef(text), intern); + } + /** Returns the field of this term, an interned string. The field indicates the part of a document which this term came from. */ public final String field() { return field; } @@ -62,11 +75,25 @@ /** Returns the text of this term. In the case of words, this is simply the text of the word. In the case of dates and other types, this is an encoding of the object as a string. */ - public final String text() { return text; } - + public final String text() { return bytes.utf8ToString(); } + + /** Returns the bytes of this term. */ + public final BytesRef bytes() { return bytes; } + /** * Optimized construction of new Terms by reusing same field as this Term * - avoids field.intern() overhead + * @param text The bytes of the new term (field is implicitly same as this Term instance) + * @return A new Term + */ + public Term createTerm(BytesRef bytes) + { + return new Term(field,bytes,false); + } + + /** + * Optimized construction of new Terms by reusing same field as this Term + * - avoids field.intern() overhead * @param text The text of the new term (field is implicitly same as this Term instance) * @return A new Term */ @@ -89,10 +116,10 @@ return false; } else if (!field.equals(other.field)) return false; - if (text == null) { - if (other.text != null) + if (bytes == null) { + if (other.bytes != null) return false; - } else if (!text.equals(other.text)) + } else if (!bytes.equals(other.bytes)) return false; return true; } @@ -102,7 +129,7 @@ final int prime = 31; int result = 1; result = prime * result + ((field == null) ? 0 : field.hashCode()); - result = prime * result + ((text == null) ? 0 : text.hashCode()); + result = prime * result + ((bytes == null) ? 0 : bytes.hashCode()); return result; } @@ -113,19 +140,25 @@ The ordering of terms is first by field, then by text.*/ public final int compareTo(Term other) { if (field == other.field) // fields are interned - return text.compareTo(other.text); + return bytes.compareTo(other.bytes); else return field.compareTo(other.field); } /** Resets the field and text of a Term. */ + final void set(String fld, BytesRef bytes) { + field = fld; + this.bytes = bytes; + } + + /** Resets the field and text of a Term. */ final void set(String fld, String txt) { field = fld; - text = txt; + this.bytes = new BytesRef(txt); } @Override - public final String toString() { return field + ":" + text; } + public final String toString() { return field + ":" + bytes.utf8ToString(); } private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException Index: lucene/src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 957599) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -1166,7 +1166,7 @@ num.setNum(docIDUpto); deletesInRAM.numTerms++; - deletesInRAM.addBytesUsed(BYTES_PER_DEL_TERM + term.text.length()*CHAR_NUM_BYTE); + deletesInRAM.addBytesUsed(BYTES_PER_DEL_TERM + term.bytes.length); } // Buffer a specific docID for deletion. Currently only Index: lucene/src/java/org/apache/lucene/util/BytesRef.java =================================================================== --- lucene/src/java/org/apache/lucene/util/BytesRef.java (revision 957599) +++ lucene/src/java/org/apache/lucene/util/BytesRef.java (working copy) @@ -32,6 +32,8 @@ public static final byte[] EMPTY_BYTES = new byte[0]; + public static final BytesRef EMPTY = new BytesRef(0); + /** The contents of the BytesRef. Should never be {@code null}. */ public byte[] bytes;