diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java index ab89821..c838a55 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java @@ -30,11 +30,14 @@ import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.IOUtils; import java.io.Closeable; import java.util.Set; +import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.*; + /** * Class responsible for access to stored document fields. *
@@ -44,14 +47,13 @@ import java.util.Set; * @lucene.internal */ public final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable { - private final static int FORMAT_SIZE = 4; - private final FieldInfos fieldInfos; private final IndexInput fieldsStream; private final IndexInput indexStream; private int numTotalDocs; private int size; private boolean closed; + private final long headerLength; /** Returns a cloned FieldsReader that shares open * IndexInputs with the original one. It is the caller's @@ -61,16 +63,17 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme @Override public Lucene40StoredFieldsReader clone() { ensureOpen(); - return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone()); + return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone(), headerLength); } // Used only by clone - private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream) { + private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream, long headerLength) { this.fieldInfos = fieldInfos; this.numTotalDocs = numTotalDocs; this.size = size; this.fieldsStream = fieldsStream; this.indexStream = indexStream; + this.headerLength = headerLength; } public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { @@ -78,17 +81,15 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme boolean success = false; fieldInfos = fn; try { - fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context); - final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); + fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context); + final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION); indexStream = d.openInput(indexStreamFN, context); - // its a 4.0 codec: so its not too-old, its corrupt. - // TODO: change this to CodecUtil.checkHeader - if (Lucene40StoredFieldsWriter.FORMAT_CURRENT != indexStream.readInt()) { - throw new CorruptIndexException("unexpected fdx header: " + indexStream); - } - - final long indexSize = indexStream.length() - FORMAT_SIZE; + CodecUtil.checkHeader(indexStream, CODEC_NAME, VERSION_START, VERSION_CURRENT); + CodecUtil.checkHeader(fieldsStream, CODEC_NAME, VERSION_START, VERSION_CURRENT); + headerLength = fieldsStream.getFilePointer(); + assert headerLength == indexStream.getFilePointer(); + final long indexSize = indexStream.length() - headerLength; this.size = (int) (indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.size != si.docCount) { @@ -135,7 +136,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme } private void seekIndex(int docID) throws IOException { - indexStream.seek(FORMAT_SIZE + docID * 8L); + indexStream.seek(headerLength + docID * 8L); } public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { @@ -148,7 +149,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); int bits = fieldsStream.readByte() & 0xFF; - assert bits <= (Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK | Lucene40StoredFieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits); + assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits); switch(visitor.needsField(fieldInfo)) { case YES: @@ -164,19 +165,19 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme } private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException { - final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK; + final int numeric = bits & FIELD_IS_NUMERIC_MASK; if (numeric != 0) { switch(numeric) { - case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT: + case FIELD_IS_NUMERIC_INT: visitor.intField(info, fieldsStream.readInt()); return; - case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG: + case FIELD_IS_NUMERIC_LONG: visitor.longField(info, fieldsStream.readLong()); return; - case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT: + case FIELD_IS_NUMERIC_FLOAT: visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt())); return; - case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE: + case FIELD_IS_NUMERIC_DOUBLE: visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong())); return; default: @@ -186,7 +187,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme final int length = fieldsStream.readVInt(); byte bytes[] = new byte[length]; fieldsStream.readBytes(bytes, 0, length); - if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) { + if ((bits & FIELD_IS_BINARY) != 0) { visitor.binaryField(info, bytes, 0, bytes.length); } else { visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8)); @@ -195,15 +196,15 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme } private void skipField(int bits) throws IOException { - final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK; + final int numeric = bits & FIELD_IS_NUMERIC_MASK; if (numeric != 0) { switch(numeric) { - case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT: - case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT: + case FIELD_IS_NUMERIC_INT: + case FIELD_IS_NUMERIC_FLOAT: fieldsStream.readInt(); return; - case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG: - case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE: + case FIELD_IS_NUMERIC_LONG: + case FIELD_IS_NUMERIC_DOUBLE: fieldsStream.readLong(); return; default: @@ -242,7 +243,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme } public static void files(SegmentInfo info, Set
@@ -54,22 +54,6 @@ import org.apache.lucene.util.IOUtils;
*/
public class Lucene40TermVectorsReader extends TermVectorsReader {
- // NOTE: if you make a new format, it must be larger than
- // the current format
-
- // Changed strings to UTF8 with length-in-bytes not length-in-chars
- static final int FORMAT_UTF8_LENGTH_IN_BYTES = 4;
-
- // NOTE: always change this if you switch to a new format!
- // whenever you add a new format, make it 1 larger (positive version logic)!
- static final int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
-
- // when removing support for old versions, leave the last supported version here
- static final int FORMAT_MINIMUM = FORMAT_UTF8_LENGTH_IN_BYTES;
-
- //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file
- static final int FORMAT_SIZE = 4;
-
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
@@ -82,6 +66,12 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
/** Extension of vectors index file */
static final String VECTORS_INDEX_EXTENSION = "tvx";
+
+ static final String CODEC_NAME = "Lucene40TermVectors";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ private final long headerLength;
private FieldInfos fieldInfos;
@@ -91,17 +81,16 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
private int size;
private int numTotalDocs;
- private final int format;
// used by clone
- Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs, int format) {
+ Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs, long headerLength) {
this.fieldInfos = fieldInfos;
this.tvx = tvx;
this.tvd = tvd;
this.tvf = tvf;
this.size = size;
this.numTotalDocs = numTotalDocs;
- this.format = format;
+ this.headerLength = headerLength;
}
public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
@@ -114,18 +103,21 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
try {
String idxName = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
tvx = d.openInput(idxName, context);
- format = checkValidFormat(tvx);
+ final int tvxVersion = CodecUtil.checkHeader(tvx, CODEC_NAME, VERSION_START, VERSION_CURRENT);
+ headerLength = tvx.getFilePointer();
String fn = IndexFileNames.segmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
tvd = d.openInput(fn, context);
- final int tvdFormat = checkValidFormat(tvd);
+ final int tvdVersion = CodecUtil.checkHeader(tvd, CODEC_NAME, VERSION_START, VERSION_CURRENT);
fn = IndexFileNames.segmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
tvf = d.openInput(fn, context);
- final int tvfFormat = checkValidFormat(tvf);
-
- assert format == tvdFormat;
- assert format == tvfFormat;
+ final int tvfVersion = CodecUtil.checkHeader(tvf, CODEC_NAME, VERSION_START, VERSION_CURRENT);
+
+ assert headerLength == tvd.getFilePointer();
+ assert headerLength == tvf.getFilePointer();
+ assert tvxVersion == tvdVersion;
+ assert tvxVersion == tvfVersion;
- numTotalDocs = (int) (tvx.length() >> 4);
+ numTotalDocs = (int) (tvx.length()-headerLength >> 4);
this.size = numTotalDocs;
assert size == 0 || numTotalDocs == size;
@@ -156,13 +148,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
// Not private to avoid synthetic access$NNN methods
void seekTvx(final int docNum) throws IOException {
- tvx.seek(docNum * 16L + FORMAT_SIZE);
- }
-
- boolean canReadRawDocs() {
- // we can always read raw docs, unless the term vectors
- // didn't exist
- return format != 0;
+ tvx.seek(docNum * 16L + headerLength);
}
/** Retrieve the length (in bytes) of the tvd and tvf
@@ -210,16 +196,6 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
}
}
- private int checkValidFormat(IndexInput in) throws CorruptIndexException, IOException
- {
- int format = in.readInt();
- if (format < FORMAT_MINIMUM)
- throw new IndexFormatTooOldException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT);
- if (format > FORMAT_CURRENT)
- throw new IndexFormatTooNewException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT);
- return format;
- }
-
public void close() throws IOException {
IOUtils.close(tvx, tvd, tvf);
}
@@ -708,7 +684,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
cloneTvf = (IndexInput) tvf.clone();
}
- return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, format);
+ return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, headerLength);
}
public static void files(SegmentInfo info, Set