Index: src/test/org/apache/lucene/index/TestFieldsReader.java =================================================================== --- src/test/org/apache/lucene/index/TestFieldsReader.java (revision 382277) +++ src/test/org/apache/lucene/index/TestFieldsReader.java (working copy) @@ -69,6 +69,38 @@ assertTrue(field.isStorePositionWithTermVector() == false); assertTrue(field.getOmitNorms() == true); + reader.close(); + } + + public void testDocField() throws IOException { + assertTrue(dir != null); + assertTrue(fieldInfos != null); + FieldsReader reader = new FieldsReader(dir, "test", fieldInfos); + assertTrue(reader != null); + assertTrue(reader.size() == 1); + + Field field = reader.docField(0,"textField1"); + assertTrue(field != null); + assertEquals(DocHelper.FIELD_1_TEXT, field.stringValue()); + + field = reader.docField(0,"textField2"); + assertTrue(field != null); + assertTrue(field.isTermVectorStored() == true); + + assertTrue(field.isStoreOffsetWithTermVector() == true); + assertTrue(field.isStorePositionWithTermVector() == true); + assertTrue(field.getOmitNorms() == false); + assertEquals(DocHelper.FIELD_2_TEXT, field.stringValue()); + + field = reader.docField(0, "textField3"); + assertTrue(field != null); + assertTrue(field.isTermVectorStored() == false); + assertTrue(field.isStoreOffsetWithTermVector() == false); + assertTrue(field.isStorePositionWithTermVector() == false); + assertTrue(field.getOmitNorms() == true); + assertEquals(DocHelper.FIELD_3_TEXT, field.stringValue()); + + assertTrue(reader.docField(0,"doesnotexist") == null); reader.close(); } Index: src/java/org/apache/lucene/index/FieldsReader.java =================================================================== --- src/java/org/apache/lucene/index/FieldsReader.java (revision 382277) +++ src/java/org/apache/lucene/index/FieldsReader.java (working copy) @@ -57,6 +57,11 @@ return size; } + /** + * Retrieve a Document that contains all of the fields defined in the index + * @param n the document number + * @return + */ final Document doc(int n) throws IOException { indexStream.seek(n * 8L); long position = indexStream.readLong(); @@ -67,9 +72,22 @@ for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.readVInt(); FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); + doc.add(getFieldFromStream(fi)); + } - byte bits = fieldsStream.readByte(); + return doc; + } + /** + * Retrive a particular field from fieldsStream, which is currently + * looking at the bits byte of a field. When finished, the fieldStream + * will be looking at the fieldNum of the next stored field. + * @param fi the FieldInfo for the field being examined (based on the + * previous vint in the stream) + * @return the Field. + */ + private Field getFieldFromStream(FieldInfo fi) throws IOException { + byte bits = fieldsStream.readByte(); boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; @@ -77,9 +95,9 @@ final byte[] b = new byte[fieldsStream.readVInt()]; fieldsStream.readBytes(b, 0, b.length); if (compressed) - doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); + return new Field(fi.name, uncompress(b), Field.Store.COMPRESS); else - doc.add(new Field(fi.name, b, Field.Store.YES)); + return new Field(fi.name, b, Field.Store.YES); } else { Field.Index index; @@ -123,7 +141,7 @@ index, termVector); f.setOmitNorms(fi.omitNorms); - doc.add(f); + return f; } else { Field f = new Field(fi.name, // name @@ -132,13 +150,64 @@ index, termVector); f.setOmitNorms(fi.omitNorms); - doc.add(f); + return f; } } } - return doc; + /** + * Retrieve the first field in the given document with the + * specified field name. If there are more than one field + * in the document with that field name, only the first one + * is returned. + * @param n the document to retrieve + * @param fieldName the name of the field to retrieve + * @return the first field in the document with that name, or null + * if the document doesn't have such a field stored. + * @see Document#getField(String) + * @throws IOException + */ + final Field docField(int n, String fieldName) throws IOException { + int fieldNo = fieldInfos.fieldNumber(fieldName); + if (fieldNo < 0) return null; + + // Seek to the start of all the fields + indexStream.seek(n * 8L); + long position = indexStream.readLong(); + fieldsStream.seek(position); + + int numFields = fieldsStream.readVInt(); + + for (int i = 0; i < numFields; i++) { + int fieldNumber = fieldsStream.readVInt(); + if (fieldNumber == fieldNo) { + // This is the field we want + FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); + return getFieldFromStream(fi); + } else { + // This is the field we want to skip + byte bits = fieldsStream.readByte(); // The bits + boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; + boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; + + int dataLength = fieldsStream.readVInt(); // Length to skip; + + if (compressed || binary) { + fieldsStream.seek(fieldsStream.getFilePointer() + dataLength); + } else { + // If not compressed or binary, we store the number of chars, not number of bytes. + for (int j = 0; j < dataLength; j++) { + fieldsStream.readChar(); } + } + continue; + } + + } + // The field wasn't defined on the document, so ignore it. + return null; + } + private final byte[] uncompress(final byte[] input) throws IOException Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 382277) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -357,6 +357,20 @@ Document in this index. */ public abstract Document document(int n) throws IOException; + /** + * Return the document field for the given document. If querying for + * only one field on a document, implementations may make this more efficient + * that calling document(doc).getField(field). It will only return the + * first value of the field in the document, like {@link Document#getField(String)} + * + * Only use this function if you know there can be only one value for the field + * (like a document id), this is the only field you want, + * and you want to reduce the overhead of querying. + */ + public Field getDocField(int doc, String field) throws IOException { + return document(doc).getField(field); + } + /** Returns true if document n has been deleted */ public abstract boolean isDeleted(int n); Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 382277) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -51,6 +51,7 @@ // Compound File Reader when based on a compound file segment CompoundFileReader cfsReader = null; + private class Norm { public Norm(IndexInput in, int number) { @@ -284,6 +285,13 @@ return fieldsReader.doc(n); } + public Field getDocField(int doc, String fieldName) throws IOException { + if (isDeleted(doc)) + throw new IllegalArgumentException + ("attempt to access a deleted document"); + return fieldsReader.docField(doc, fieldName); + } + public synchronized boolean isDeleted(int n) { return (deletedDocs != null && deletedDocs.get(n)); } Index: src/java/org/apache/lucene/store/IndexInput.java =================================================================== --- src/java/org/apache/lucene/store/IndexInput.java (revision 382277) +++ src/java/org/apache/lucene/store/IndexInput.java (working copy) @@ -103,18 +103,26 @@ throws IOException { final int end = start + length; for (int i = start; i < end; i++) { + buffer[i] = readChar(); + } + } + + /** Reads a single UTF-8 encoded character + * @return the next character encoded as encoded in UTF-8 format. + * @see IndexOutput#writeChars(String,int,int) + */ + public final char readChar() throws IOException { byte b = readByte(); if ((b & 0x80) == 0) - buffer[i] = (char)(b & 0x7F); + return (char)(b & 0x7F); else if ((b & 0xE0) != 0xE0) { - buffer[i] = (char)(((b & 0x1F) << 6) + return (char)(((b & 0x1F) << 6) | (readByte() & 0x3F)); } else - buffer[i] = (char)(((b & 0x0F) << 12) + return (char)(((b & 0x0F) << 12) | ((readByte() & 0x3F) << 6) | (readByte() & 0x3F)); } - } /** Closes the stream to futher operations. */ public abstract void close() throws IOException;