Index: lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java (revision 982338) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java (working copy) @@ -80,7 +80,7 @@ final long termsStart; long packedIndexStart; long packedOffsetsStart; - private int numTerms; + private long numTerms; // TODO: we could conceivably make a PackedInts wrapper // that auto-grows... then we wouldn't force 6 bytes RAM @@ -90,6 +90,8 @@ private long lastTermsPointer; private long totTermLength; + private final BytesRef lastTerm = new BytesRef(); + SimpleFieldWriter(FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; indexStart = out.getFilePointer(); @@ -103,9 +105,21 @@ // First term is first indexed term: if (0 == (numTerms++ % termIndexInterval)) { - // write full bytes - out.writeBytes(text.bytes, text.offset, text.length); + // we can safely strip off the non-distinguishing + // suffix to save RAM in the loaded terms index + final int limit = Math.min(lastTerm.length, text.length); + int minPrefixDiff = 1+lastTerm.length; + for(int byteIdx=0;byteIdx