Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1228907) +++ lucene/CHANGES.txt (working copy) @@ -714,6 +714,11 @@ contrib/queryparser. If you have used those classes in your code just add the lucene-queryparser.jar file to your classpath. (Uwe Schindler) + +* LUCENE-3681: FST now stores labels for BYTE2 input type as 2 bytes + instead of vInt; this can make FSTs smaller and faster, but it is a + break in the binary format so if you had built and saved any FSTs + then you need to rebuild them. (Robert Muir, Mike McCandless) Security fixes Index: lucene/src/java/org/apache/lucene/util/fst/FST.java =================================================================== --- lucene/src/java/org/apache/lucene/util/fst/FST.java (revision 1228907) +++ lucene/src/java/org/apache/lucene/util/fst/FST.java (working copy) @@ -92,8 +92,11 @@ /** Changed numBytesPerArc for array'd case from byte to int. */ private final static int VERSION_INT_NUM_BYTES_PER_ARC = 1; - private final static int VERSION_CURRENT = VERSION_INT_NUM_BYTES_PER_ARC; + /** Write BYTE2 labels as 2-byte short, not vInt. */ + private final static int VERSION_SHORT_BYTE2_LABELS = 2; + private final static int VERSION_CURRENT = VERSION_SHORT_BYTE2_LABELS; + // Never serialized; just used to represent the virtual // final node w/ no arcs: private final static int FINAL_END_NODE = -1; @@ -199,7 +202,9 @@ public FST(DataInput in, Outputs outputs) throws IOException { this.outputs = outputs; writer = null; - CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_INT_NUM_BYTES_PER_ARC, VERSION_INT_NUM_BYTES_PER_ARC); + // NOTE: only reads most recent format; we don't have + // back-compat promise for FSTs (they are experimental): + CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_SHORT_BYTE2_LABELS, VERSION_SHORT_BYTE2_LABELS); if (in.readByte() == 1) { // accepts empty string int numBytes = in.readVInt(); @@ -389,7 +394,7 @@ writer.writeByte((byte) v); } else if (inputType == INPUT_TYPE.BYTE2) { assert v <= 65535: "v=" + v; - writer.writeVInt(v); + writer.writeShort((short) v); } else { //writeInt(v); writer.writeVInt(v); @@ -399,7 +404,11 @@ int readLabel(DataInput in) throws IOException { final int v; if (inputType == INPUT_TYPE.BYTE1) { + // Unsigned byte: v = in.readByte()&0xFF; + } else if (inputType == INPUT_TYPE.BYTE2) { + // Unsigned short: + v = in.readShort()&0xFFFF; } else { v = in.readVInt(); }