Index: src/java/org/apache/lucene/document/Document.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/document/Document.java,v retrieving revision 1.19 diff -u -r1.19 Document.java --- src/java/org/apache/lucene/document/Document.java 21 Apr 2004 17:08:04 -0000 1.19 +++ src/java/org/apache/lucene/document/Document.java 3 Jun 2004 16:36:08 -0000 @@ -144,14 +144,16 @@ /** Returns the string value of the field with the given name if any exist in * this document, or null. If multiple fields exist with this name, this - * method returns the first value added. + * method returns the first value added. If only binary fields with this name + * exist, returns null. */ public final String get(String name) { - Field field = getField(name); - if (field != null) - return field.stringValue(); - else - return null; + for (int i = 0; i < fields.size(); i++) { + Field field = (Field)fields.get(i); + if (field.name().equals(name) && (!field.isBinary())) + return field.stringValue(); + } + return null; } /** Returns an Enumeration of all the fields in a document. */ @@ -183,22 +185,65 @@ /** * Returns an array of values of the field specified as the method parameter. - * This method can return null. + * This method can return null. * * @param name the name of the field * @return a String[] of field values */ public final String[] getValues(String name) { - Field[] namedFields = getFields(name); - if (namedFields == null) - return null; - String[] values = new String[namedFields.length]; - for (int i = 0; i < namedFields.length; i++) { - values[i] = namedFields[i].stringValue(); + List result = new ArrayList(); + for (int i = 0; i < fields.size(); i++) { + Field field = (Field)fields.get(i); + if (field.name().equals(name) && (!field.isBinary())) + result.add(field.stringValue()); } - return values; + + if (result.size() == 0) + return null; + + return (String[])result.toArray(new String[result.size()]); } + /** + * Returns an array of byte arrays for of the fields that have the name specified + * as the method parameter. This method will return null if no + * binary fields with the specified name are available. + * + * @param name the name of the field + * @return a byte[][] of binary field values. + */ + public final byte[][] getBinaryValues(String name) { + List result = new ArrayList(); + for (int i = 0; i < fields.size(); i++) { + Field field = (Field)fields.get(i); + if (field.name().equals(name) && (field.isBinary())) + result.add(field.binaryValue()); + } + + if (result.size() == 0) + return null; + + return (byte[][])result.toArray(new byte[result.size()][]); + } + + /** + * Returns an array of bytes for the first (or only) field that has the name + * specified as the method parameter. This method will return null + * if no binary fields with the specified name are available. + * There may be non-binary fields with the same name. + * + * @param name the name of the field. + * @return a byte[] containing the binary field value. + */ + public final byte[] getBinaryValue(String name) { + for (int i=0; i < fields.size(); i++) { + Field field = (Field)fields.get(i); + if (field.name().equals(name) && (field.isBinary())) + return field.binaryValue(); + } + return null; + } + /** Prints the fields of a document for human consumption. */ public final String toString() { StringBuffer buffer = new StringBuffer(); Index: src/java/org/apache/lucene/document/Field.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/document/Field.java,v retrieving revision 1.14 diff -u -r1.14 Field.java --- src/java/org/apache/lucene/document/Field.java 16 Apr 2004 09:48:25 -0000 1.14 +++ src/java/org/apache/lucene/document/Field.java 3 Jun 2004 16:36:08 -0000 @@ -24,20 +24,24 @@ /** A field is a section of a Document. Each field has two parts, a name and a - value. Values may be free text, provided as a String or as a Reader, or they - may be atomic keywords, which are not further processed. Such keywords may - be used to represent dates, urls, etc. Fields are optionally stored in the - index, so that they may be returned with hits on the document. + value. Values may be free text, provided as a String, or as a Reader, they + may be atomic keywords which are not further processed. Such keywords may + be used to represent dates, urls, etc. Fields may also store binary values + which can be used to store compressed data in the index. Fields are + optionally stored in the index, so that they may be returned with hits + on the document. Binary fields are always stored in the index. */ public final class Field implements java.io.Serializable { private String name = "body"; private String stringValue = null; + private byte[] binaryValue = null; private boolean storeTermVector = false; private Reader readerValue = null; private boolean isStored = false; private boolean isIndexed = true; private boolean isTokenized = true; + private boolean isBinary = false; private float boost = 1.0f; @@ -137,17 +141,29 @@ return f; } + /** Constructs a Binary-valued field that is not tokenixed nor indexed, but is + stored in the index verbatim. Useful for storing compressed data in the + index, for return with hits. */ + public static final Field Binary(String name, byte[] value) { + return new Field(name, value); + } + /** The name of the field (e.g., "date", "subject", "title", or "body") as an interned string. */ public String name() { return name; } - /** The value of the field as a String, or null. If null, the Reader value - is used. Exactly one of stringValue() and readerValue() must be set. */ - public String stringValue() { return stringValue; } - /** The value of the field as a Reader, or null. If null, the String value - is used. Exactly one of stringValue() and readerValue() must be set. */ + /** The value of the field as a String, or null. If null, the Reader or + Binary value is used. Exactly one of stringValue(), readerValue() and + binaryValue() must be set. */ + public String stringValue() { return stringValue; } + /** The value of the field as a Reader, or null. If null, the String or + Binary value is used. Exactly one of stringValue(), readerValue() and + binaryValue() must be set. */ public Reader readerValue() { return readerValue; } - + /** The value of the field in Binary, or null. If null, the Reader or + String value is used. Exactly one of stringValue(), readerValue() and + binaryValue() must be set. */ + public byte[] binaryValue() { return binaryValue; } /** Create a field by specifying all parameters except for storeTermVector, * which is set to false. @@ -193,6 +209,21 @@ this.readerValue = reader; } + Field(String name, byte[] value) { + if (name == null) + throw new IllegalArgumentException("name cannot be null"); + if (value == null) + throw new IllegalArgumentException("value cannot be null"); + + this.name = name.intern(); + this.binaryValue = value; + + this.isBinary = true; + this.isStored = true; + this.isIndexed = false; + this.isTokenized = false; + } + /** True iff the value of the field is to be stored in the index for return with search hits. It is an error for this to be true if a field is Reader-valued. */ @@ -207,6 +238,9 @@ Reader-valued. */ public final boolean isTokenized() { return isTokenized; } + /** True iff the value of the filed is stored as binary */ + public final boolean isBinary() { return isBinary; } + /** True iff the term or terms used to index this field are stored as a term * vector, available from {@link IndexReader#getTermFreqVector(int,String)}. * These methods do not provide access to the original content of the field, @@ -221,6 +255,8 @@ public final String toString() { if (isStored && isIndexed && !isTokenized) return "Keyword<" + name + ":" + stringValue + ">"; + else if (isBinary) + return "Binary<" + name + ">"; else if (isStored && !isIndexed && !isTokenized) return "Unindexed<" + name + ":" + stringValue + ">"; else if (isStored && isIndexed && isTokenized && stringValue!=null) Index: src/java/org/apache/lucene/index/FieldsReader.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/FieldsReader.java,v retrieving revision 1.7 diff -u -r1.7 FieldsReader.java --- src/java/org/apache/lucene/index/FieldsReader.java 29 Mar 2004 22:48:02 -0000 1.7 +++ src/java/org/apache/lucene/index/FieldsReader.java 3 Jun 2004 16:36:08 -0000 @@ -67,11 +67,17 @@ byte bits = fieldsStream.readByte(); - doc.add(new Field(fi.name, // name - fieldsStream.readString(), // read value - true, // stored - fi.isIndexed, // indexed - (bits & 1) != 0, fi.storeTermVector)); // vector + if ((bits & 2) != 0) { + final byte[] b = new byte[fieldsStream.readVInt()]; + fieldsStream.readBytes(b, 0, b.length); + doc.add(Field.Binary(fi.name, b)); + } + else + doc.add(new Field(fi.name, // name + fieldsStream.readString(), // read value + true, // stored + fi.isIndexed, // indexed + (bits & 1) != 0, fi.storeTermVector)); // vector } return doc; Index: src/java/org/apache/lucene/index/FieldsWriter.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/FieldsWriter.java,v retrieving revision 1.3 diff -u -r1.3 FieldsWriter.java --- src/java/org/apache/lucene/index/FieldsWriter.java 29 Mar 2004 22:48:02 -0000 1.3 +++ src/java/org/apache/lucene/index/FieldsWriter.java 3 Jun 2004 16:36:08 -0000 @@ -62,9 +62,19 @@ byte bits = 0; if (field.isTokenized()) bits |= 1; + + if (field.isBinary()) + bits |= 2; + fieldsStream.writeByte(bits); - fieldsStream.writeString(field.stringValue()); + if (field.isBinary()) { + final int len = field.binaryValue().length; + fieldsStream.writeVInt(len); + fieldsStream.writeBytes(field.binaryValue(), len); + } + else + fieldsStream.writeString(field.stringValue()); } } } Index: src/test/org/apache/lucene/document/TestDocument.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/document/TestDocument.java,v retrieving revision 1.4 diff -u -r1.4 TestDocument.java --- src/test/org/apache/lucene/document/TestDocument.java 20 Apr 2004 17:26:16 -0000 1.4 +++ src/test/org/apache/lucene/document/TestDocument.java 3 Jun 2004 16:36:08 -0000 @@ -50,6 +50,10 @@ public void testRemoveForNewDocument() throws Exception { Document doc = makeDocumentWithFields(); + assertEquals(12, doc.fields.size()); + doc.removeFields("mixed"); + assertEquals(10, doc.fields.size()); + doc.removeFields("binary"); assertEquals(8, doc.fields.size()); doc.removeFields("keyword"); assertEquals(6, doc.fields.size()); @@ -131,19 +135,35 @@ doc.add(Field.UnIndexed("unindexed", "test2")); doc.add(Field.UnStored( "unstored", "test1")); doc.add(Field.UnStored( "unstored", "test2")); + doc.add(Field.Binary( "binary" , "test1".getBytes())); + doc.add(Field.Binary( "binary" , "test2".getBytes())); + doc.add(Field.UnIndexed("mixed", "test1")); + doc.add(Field.Binary( "mixed", "test2".getBytes())); return doc; } private void doAssert(Document doc, boolean fromIndex) { - String[] keywordFieldValues = doc.getValues("keyword"); - String[] textFieldValues = doc.getValues("text"); - String[] unindexedFieldValues = doc.getValues("unindexed"); - String[] unstoredFieldValues = doc.getValues("unstored"); - + String[] keywordFieldValues = doc.getValues("keyword"); + String[] textFieldValues = doc.getValues("text"); + String[] unindexedFieldValues = doc.getValues("unindexed"); + String[] unstoredFieldValues = doc.getValues("unstored"); + byte[][] binaryFieldValues = doc.getBinaryValues("binary"); + byte[] mixedFieldBinaryValue = doc.getBinaryValue("mixed"); + + String[] mixedFieldStringValues = doc.getValues("mixed"); + byte[][] mixedFieldBinaryValues = doc.getBinaryValues("mixed"); + String mixedFieldStringValue = doc.get("mixed"); + assertTrue(keywordFieldValues.length == 2); assertTrue(textFieldValues.length == 2); assertTrue(unindexedFieldValues.length == 2); + assertTrue(binaryFieldValues.length == 2); + assertTrue(mixedFieldBinaryValue.length == "test2".getBytes().length); + assertTrue(mixedFieldStringValues.length == 1); + assertTrue(mixedFieldBinaryValues.length == 1); + assertTrue(mixedFieldStringValue != null); + // this test cannot work for documents retrieved from the index // since unstored fields will obviously not be returned if (! fromIndex) @@ -157,6 +177,13 @@ assertTrue(textFieldValues[1].equals("test2")); assertTrue(unindexedFieldValues[0].equals("test1")); assertTrue(unindexedFieldValues[1].equals("test2")); + assertTrue(new String(binaryFieldValues[0]).equals("test1")); + assertTrue(new String(binaryFieldValues[1]).equals("test2")); + assertTrue(mixedFieldStringValues[0].equals("test1")); + assertTrue(new String(mixedFieldBinaryValues[0]).equals("test2")); + assertTrue(mixedFieldStringValue.equals("test1")); + assertTrue(new String(mixedFieldBinaryValue).equals("test2")); + // this test cannot work for documents retrieved from the index // since unstored fields will obviously not be returned if (! fromIndex) Index: src/test/org/apache/lucene/index/DocHelper.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/DocHelper.java,v retrieving revision 1.1 diff -u -r1.1 DocHelper.java --- src/test/org/apache/lucene/index/DocHelper.java 20 Feb 2004 20:14:55 -0000 1.1 +++ src/test/org/apache/lucene/index/DocHelper.java 3 Jun 2004 16:36:09 -0000 @@ -52,12 +52,29 @@ public static final String UNSTORED_FIELD_2_KEY = "unStoredField2"; public static Field unStoredField2 = Field.UnStored(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, true); + public static final String BINARY_1_FIELD_TEXT = "binary field text"; + public static final String BINARY_FIELD_1_KEY = "binaryField1"; + public static Field binaryField1 = Field.Binary(BINARY_FIELD_1_KEY, BINARY_1_FIELD_TEXT.getBytes()); + + public static final String BINARY_2_FIELD_TEXT = "binary field text"; + public static final String BINARY_FIELD_2_KEY = "binaryField2"; + public static Field binaryField2 = Field.Binary(BINARY_FIELD_2_KEY, BINARY_2_FIELD_TEXT.getBytes()); + + public static String BIG_BINARY_FIELD_TEXT; + public static final String BIG_BINARY_FIELD_KEY = "bigBinaryField"; + public static Field bigBinaryField; + // public static Set fieldNamesSet = null; // public static Set fieldValuesSet = null; public static Map nameValues = null; static { + StringBuffer buf = new StringBuffer(); + for (int i=0; i < 100000; i++) + buf.append("No matter where you go, there you are..\n"); + BIG_BINARY_FIELD_TEXT = buf.toString(); + bigBinaryField = Field.Binary(BIG_BINARY_FIELD_KEY, BIG_BINARY_FIELD_TEXT.getBytes()); nameValues = new HashMap(); nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT); @@ -66,6 +83,9 @@ nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT); nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT); nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT); + nameValues.put(BINARY_FIELD_1_KEY, BINARY_1_FIELD_TEXT); + nameValues.put(BINARY_FIELD_2_KEY, BINARY_2_FIELD_TEXT); + nameValues.put(BIG_BINARY_FIELD_KEY, BIG_BINARY_FIELD_TEXT); } /** @@ -79,7 +99,11 @@ doc.add(unIndField); doc.add(unStoredField1); doc.add(unStoredField2); - } + doc.add(binaryField1); + doc.add(binaryField2); + doc.add(bigBinaryField); + } + /** * Writes the document to the directory using a segment named "test" * @param dir Index: src/test/org/apache/lucene/index/TestDocumentWriter.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java,v retrieving revision 1.2 diff -u -r1.2 TestDocumentWriter.java --- src/test/org/apache/lucene/index/TestDocumentWriter.java 29 Mar 2004 22:48:06 -0000 1.2 +++ src/test/org/apache/lucene/index/TestDocumentWriter.java 3 Jun 2004 16:36:09 -0000 @@ -66,15 +66,27 @@ assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT)); assertTrue(fields[0].isTermVectorStored() == true); - + fields = doc.getFields("textField1"); assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT)); assertTrue(fields[0].isTermVectorStored() == false); - + assertTrue(fields[0].binaryValue() == null); + fields = doc.getFields("keyField"); assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT)); + assertTrue(fields[0].binaryValue() == null); + + fields = doc.getFields("binaryField1"); + assertTrue(fields != null && fields.length == 1); + assertTrue(new String(fields[0].binaryValue()).equals(DocHelper.BINARY_1_FIELD_TEXT)); + assertTrue(fields[0].stringValue() == null); + + fields = doc.getFields("bigBinaryField"); + assertTrue(fields != null && fields.length == 1); + assertTrue(new String(fields[0].binaryValue()).equals(DocHelper.BIG_BINARY_FIELD_TEXT)); + } catch (IOException e) { e.printStackTrace(); assertTrue(false); Index: src/test/org/apache/lucene/index/TestFieldInfos.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestFieldInfos.java,v retrieving revision 1.1 diff -u -r1.1 TestFieldInfos.java --- src/test/org/apache/lucene/index/TestFieldInfos.java 20 Feb 2004 20:14:55 -0000 1.1 +++ src/test/org/apache/lucene/index/TestFieldInfos.java 3 Jun 2004 16:36:09 -0000 @@ -34,7 +34,7 @@ FieldInfos fieldInfos = new FieldInfos(); fieldInfos.add(testDoc); //Since the complement is stored as well in the fields map - assertTrue(fieldInfos.size() == 7); //this is 7 b/c we are using the no-arg constructor + assertTrue(fieldInfos.size() == 10); //this is 10 b/c we are using the no-arg constructor RAMDirectory dir = new RAMDirectory(); String name = "testFile"; OutputStream output = dir.createFile(name); Index: src/test/org/apache/lucene/index/TestFieldsReader.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestFieldsReader.java,v retrieving revision 1.2 diff -u -r1.2 TestFieldsReader.java --- src/test/org/apache/lucene/index/TestFieldsReader.java 29 Mar 2004 22:48:06 -0000 1.2 +++ src/test/org/apache/lucene/index/TestFieldsReader.java 3 Jun 2004 16:36:09 -0000 @@ -68,6 +68,9 @@ Field field = doc.getField("textField2"); assertTrue(field != null); assertTrue(field.isTermVectorStored() == true); + field = doc.getField("binaryField1"); + assertTrue(field != null); + assertTrue(field.isBinary() == true); reader.close(); } catch (IOException e) { e.printStackTrace(); Index: src/test/org/apache/lucene/index/TestSegmentReader.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentReader.java,v retrieving revision 1.2 diff -u -r1.2 TestSegmentReader.java --- src/test/org/apache/lucene/index/TestSegmentReader.java 29 Mar 2004 22:48:06 -0000 1.2 +++ src/test/org/apache/lucene/index/TestSegmentReader.java 3 Jun 2004 16:36:09 -0000 @@ -55,7 +55,7 @@ assertTrue(dir != null); assertTrue(reader != null); assertTrue(DocHelper.nameValues.size() > 0); - assertTrue(DocHelper.numFields(testDoc) == 6); + assertTrue(DocHelper.numFields(testDoc) == 9); } public void testDocument() { @@ -107,7 +107,7 @@ try { Collection result = reader.getFieldNames(); assertTrue(result != null); - assertTrue(result.size() == 7); + assertTrue(result.size() == 10); for (Iterator iter = result.iterator(); iter.hasNext();) { String s = (String) iter.next(); //System.out.println("Name: " + s); @@ -124,7 +124,7 @@ result = reader.getFieldNames(false); assertTrue(result != null); - assertTrue(result.size() == 2); + assertTrue(result.size() == 5); //Get all indexed fields that are storing term vectors result = reader.getIndexedFieldNames(true); assertTrue(result != null);