Index: src/java/org/apache/lucene/document/Document.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/document/Document.java,v
retrieving revision 1.19
diff -u -r1.19 Document.java
--- src/java/org/apache/lucene/document/Document.java 21 Apr 2004 17:08:04 -0000 1.19
+++ src/java/org/apache/lucene/document/Document.java 3 Jun 2004 16:36:08 -0000
@@ -144,14 +144,16 @@
/** Returns the string value of the field with the given name if any exist in
* this document, or null. If multiple fields exist with this name, this
- * method returns the first value added.
+ * method returns the first value added. If only binary fields with this name
+ * exist, returns null.
*/
public final String get(String name) {
- Field field = getField(name);
- if (field != null)
- return field.stringValue();
- else
- return null;
+ for (int i = 0; i < fields.size(); i++) {
+ Field field = (Field)fields.get(i);
+ if (field.name().equals(name) && (!field.isBinary()))
+ return field.stringValue();
+ }
+ return null;
}
/** Returns an Enumeration of all the fields in a document. */
@@ -183,22 +185,65 @@
/**
* Returns an array of values of the field specified as the method parameter.
- * This method can return null.
+ * This method can return null.
*
* @param name the name of the field
* @return a String[] of field values
*/
public final String[] getValues(String name) {
- Field[] namedFields = getFields(name);
- if (namedFields == null)
- return null;
- String[] values = new String[namedFields.length];
- for (int i = 0; i < namedFields.length; i++) {
- values[i] = namedFields[i].stringValue();
+ List result = new ArrayList();
+ for (int i = 0; i < fields.size(); i++) {
+ Field field = (Field)fields.get(i);
+ if (field.name().equals(name) && (!field.isBinary()))
+ result.add(field.stringValue());
}
- return values;
+
+ if (result.size() == 0)
+ return null;
+
+ return (String[])result.toArray(new String[result.size()]);
}
+ /**
+ * Returns an array of byte arrays for of the fields that have the name specified
+ * as the method parameter. This method will return null if no
+ * binary fields with the specified name are available.
+ *
+ * @param name the name of the field
+ * @return a byte[][] of binary field values.
+ */
+ public final byte[][] getBinaryValues(String name) {
+ List result = new ArrayList();
+ for (int i = 0; i < fields.size(); i++) {
+ Field field = (Field)fields.get(i);
+ if (field.name().equals(name) && (field.isBinary()))
+ result.add(field.binaryValue());
+ }
+
+ if (result.size() == 0)
+ return null;
+
+ return (byte[][])result.toArray(new byte[result.size()][]);
+ }
+
+ /**
+ * Returns an array of bytes for the first (or only) field that has the name
+ * specified as the method parameter. This method will return null
+ * if no binary fields with the specified name are available.
+ * There may be non-binary fields with the same name.
+ *
+ * @param name the name of the field.
+ * @return a byte[] containing the binary field value.
+ */
+ public final byte[] getBinaryValue(String name) {
+ for (int i=0; i < fields.size(); i++) {
+ Field field = (Field)fields.get(i);
+ if (field.name().equals(name) && (field.isBinary()))
+ return field.binaryValue();
+ }
+ return null;
+ }
+
/** Prints the fields of a document for human consumption. */
public final String toString() {
StringBuffer buffer = new StringBuffer();
Index: src/java/org/apache/lucene/document/Field.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/document/Field.java,v
retrieving revision 1.14
diff -u -r1.14 Field.java
--- src/java/org/apache/lucene/document/Field.java 16 Apr 2004 09:48:25 -0000 1.14
+++ src/java/org/apache/lucene/document/Field.java 3 Jun 2004 16:36:08 -0000
@@ -24,20 +24,24 @@
/**
A field is a section of a Document. Each field has two parts, a name and a
- value. Values may be free text, provided as a String or as a Reader, or they
- may be atomic keywords, which are not further processed. Such keywords may
- be used to represent dates, urls, etc. Fields are optionally stored in the
- index, so that they may be returned with hits on the document.
+ value. Values may be free text, provided as a String, or as a Reader, they
+ may be atomic keywords which are not further processed. Such keywords may
+ be used to represent dates, urls, etc. Fields may also store binary values
+ which can be used to store compressed data in the index. Fields are
+ optionally stored in the index, so that they may be returned with hits
+ on the document. Binary fields are always stored in the index.
*/
public final class Field implements java.io.Serializable {
private String name = "body";
private String stringValue = null;
+ private byte[] binaryValue = null;
private boolean storeTermVector = false;
private Reader readerValue = null;
private boolean isStored = false;
private boolean isIndexed = true;
private boolean isTokenized = true;
+ private boolean isBinary = false;
private float boost = 1.0f;
@@ -137,17 +141,29 @@
return f;
}
+ /** Constructs a Binary-valued field that is not tokenixed nor indexed, but is
+ stored in the index verbatim. Useful for storing compressed data in the
+ index, for return with hits. */
+ public static final Field Binary(String name, byte[] value) {
+ return new Field(name, value);
+ }
+
/** The name of the field (e.g., "date", "subject", "title", or "body")
as an interned string. */
public String name() { return name; }
- /** The value of the field as a String, or null. If null, the Reader value
- is used. Exactly one of stringValue() and readerValue() must be set. */
- public String stringValue() { return stringValue; }
- /** The value of the field as a Reader, or null. If null, the String value
- is used. Exactly one of stringValue() and readerValue() must be set. */
+ /** The value of the field as a String, or null. If null, the Reader or
+ Binary value is used. Exactly one of stringValue(), readerValue() and
+ binaryValue() must be set. */
+ public String stringValue() { return stringValue; }
+ /** The value of the field as a Reader, or null. If null, the String or
+ Binary value is used. Exactly one of stringValue(), readerValue() and
+ binaryValue() must be set. */
public Reader readerValue() { return readerValue; }
-
+ /** The value of the field in Binary, or null. If null, the Reader or
+ String value is used. Exactly one of stringValue(), readerValue() and
+ binaryValue() must be set. */
+ public byte[] binaryValue() { return binaryValue; }
/** Create a field by specifying all parameters except for storeTermVector,
* which is set to false.
@@ -193,6 +209,21 @@
this.readerValue = reader;
}
+ Field(String name, byte[] value) {
+ if (name == null)
+ throw new IllegalArgumentException("name cannot be null");
+ if (value == null)
+ throw new IllegalArgumentException("value cannot be null");
+
+ this.name = name.intern();
+ this.binaryValue = value;
+
+ this.isBinary = true;
+ this.isStored = true;
+ this.isIndexed = false;
+ this.isTokenized = false;
+ }
+
/** True iff the value of the field is to be stored in the index for return
with search hits. It is an error for this to be true if a field is
Reader-valued. */
@@ -207,6 +238,9 @@
Reader-valued. */
public final boolean isTokenized() { return isTokenized; }
+ /** True iff the value of the filed is stored as binary */
+ public final boolean isBinary() { return isBinary; }
+
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link IndexReader#getTermFreqVector(int,String)}.
* These methods do not provide access to the original content of the field,
@@ -221,6 +255,8 @@
public final String toString() {
if (isStored && isIndexed && !isTokenized)
return "Keyword<" + name + ":" + stringValue + ">";
+ else if (isBinary)
+ return "Binary<" + name + ">";
else if (isStored && !isIndexed && !isTokenized)
return "Unindexed<" + name + ":" + stringValue + ">";
else if (isStored && isIndexed && isTokenized && stringValue!=null)
Index: src/java/org/apache/lucene/index/FieldsReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/FieldsReader.java,v
retrieving revision 1.7
diff -u -r1.7 FieldsReader.java
--- src/java/org/apache/lucene/index/FieldsReader.java 29 Mar 2004 22:48:02 -0000 1.7
+++ src/java/org/apache/lucene/index/FieldsReader.java 3 Jun 2004 16:36:08 -0000
@@ -67,11 +67,17 @@
byte bits = fieldsStream.readByte();
- doc.add(new Field(fi.name, // name
- fieldsStream.readString(), // read value
- true, // stored
- fi.isIndexed, // indexed
- (bits & 1) != 0, fi.storeTermVector)); // vector
+ if ((bits & 2) != 0) {
+ final byte[] b = new byte[fieldsStream.readVInt()];
+ fieldsStream.readBytes(b, 0, b.length);
+ doc.add(Field.Binary(fi.name, b));
+ }
+ else
+ doc.add(new Field(fi.name, // name
+ fieldsStream.readString(), // read value
+ true, // stored
+ fi.isIndexed, // indexed
+ (bits & 1) != 0, fi.storeTermVector)); // vector
}
return doc;
Index: src/java/org/apache/lucene/index/FieldsWriter.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/FieldsWriter.java,v
retrieving revision 1.3
diff -u -r1.3 FieldsWriter.java
--- src/java/org/apache/lucene/index/FieldsWriter.java 29 Mar 2004 22:48:02 -0000 1.3
+++ src/java/org/apache/lucene/index/FieldsWriter.java 3 Jun 2004 16:36:08 -0000
@@ -62,9 +62,19 @@
byte bits = 0;
if (field.isTokenized())
bits |= 1;
+
+ if (field.isBinary())
+ bits |= 2;
+
fieldsStream.writeByte(bits);
- fieldsStream.writeString(field.stringValue());
+ if (field.isBinary()) {
+ final int len = field.binaryValue().length;
+ fieldsStream.writeVInt(len);
+ fieldsStream.writeBytes(field.binaryValue(), len);
+ }
+ else
+ fieldsStream.writeString(field.stringValue());
}
}
}
Index: src/test/org/apache/lucene/document/TestDocument.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/document/TestDocument.java,v
retrieving revision 1.4
diff -u -r1.4 TestDocument.java
--- src/test/org/apache/lucene/document/TestDocument.java 20 Apr 2004 17:26:16 -0000 1.4
+++ src/test/org/apache/lucene/document/TestDocument.java 3 Jun 2004 16:36:08 -0000
@@ -50,6 +50,10 @@
public void testRemoveForNewDocument() throws Exception
{
Document doc = makeDocumentWithFields();
+ assertEquals(12, doc.fields.size());
+ doc.removeFields("mixed");
+ assertEquals(10, doc.fields.size());
+ doc.removeFields("binary");
assertEquals(8, doc.fields.size());
doc.removeFields("keyword");
assertEquals(6, doc.fields.size());
@@ -131,19 +135,35 @@
doc.add(Field.UnIndexed("unindexed", "test2"));
doc.add(Field.UnStored( "unstored", "test1"));
doc.add(Field.UnStored( "unstored", "test2"));
+ doc.add(Field.Binary( "binary" , "test1".getBytes()));
+ doc.add(Field.Binary( "binary" , "test2".getBytes()));
+ doc.add(Field.UnIndexed("mixed", "test1"));
+ doc.add(Field.Binary( "mixed", "test2".getBytes()));
return doc;
}
private void doAssert(Document doc, boolean fromIndex)
{
- String[] keywordFieldValues = doc.getValues("keyword");
- String[] textFieldValues = doc.getValues("text");
- String[] unindexedFieldValues = doc.getValues("unindexed");
- String[] unstoredFieldValues = doc.getValues("unstored");
-
+ String[] keywordFieldValues = doc.getValues("keyword");
+ String[] textFieldValues = doc.getValues("text");
+ String[] unindexedFieldValues = doc.getValues("unindexed");
+ String[] unstoredFieldValues = doc.getValues("unstored");
+ byte[][] binaryFieldValues = doc.getBinaryValues("binary");
+ byte[] mixedFieldBinaryValue = doc.getBinaryValue("mixed");
+
+ String[] mixedFieldStringValues = doc.getValues("mixed");
+ byte[][] mixedFieldBinaryValues = doc.getBinaryValues("mixed");
+ String mixedFieldStringValue = doc.get("mixed");
+
assertTrue(keywordFieldValues.length == 2);
assertTrue(textFieldValues.length == 2);
assertTrue(unindexedFieldValues.length == 2);
+ assertTrue(binaryFieldValues.length == 2);
+ assertTrue(mixedFieldBinaryValue.length == "test2".getBytes().length);
+ assertTrue(mixedFieldStringValues.length == 1);
+ assertTrue(mixedFieldBinaryValues.length == 1);
+ assertTrue(mixedFieldStringValue != null);
+
// this test cannot work for documents retrieved from the index
// since unstored fields will obviously not be returned
if (! fromIndex)
@@ -157,6 +177,13 @@
assertTrue(textFieldValues[1].equals("test2"));
assertTrue(unindexedFieldValues[0].equals("test1"));
assertTrue(unindexedFieldValues[1].equals("test2"));
+ assertTrue(new String(binaryFieldValues[0]).equals("test1"));
+ assertTrue(new String(binaryFieldValues[1]).equals("test2"));
+ assertTrue(mixedFieldStringValues[0].equals("test1"));
+ assertTrue(new String(mixedFieldBinaryValues[0]).equals("test2"));
+ assertTrue(mixedFieldStringValue.equals("test1"));
+ assertTrue(new String(mixedFieldBinaryValue).equals("test2"));
+
// this test cannot work for documents retrieved from the index
// since unstored fields will obviously not be returned
if (! fromIndex)
Index: src/test/org/apache/lucene/index/DocHelper.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/DocHelper.java,v
retrieving revision 1.1
diff -u -r1.1 DocHelper.java
--- src/test/org/apache/lucene/index/DocHelper.java 20 Feb 2004 20:14:55 -0000 1.1
+++ src/test/org/apache/lucene/index/DocHelper.java 3 Jun 2004 16:36:09 -0000
@@ -52,12 +52,29 @@
public static final String UNSTORED_FIELD_2_KEY = "unStoredField2";
public static Field unStoredField2 = Field.UnStored(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, true);
+ public static final String BINARY_1_FIELD_TEXT = "binary field text";
+ public static final String BINARY_FIELD_1_KEY = "binaryField1";
+ public static Field binaryField1 = Field.Binary(BINARY_FIELD_1_KEY, BINARY_1_FIELD_TEXT.getBytes());
+
+ public static final String BINARY_2_FIELD_TEXT = "binary field text";
+ public static final String BINARY_FIELD_2_KEY = "binaryField2";
+ public static Field binaryField2 = Field.Binary(BINARY_FIELD_2_KEY, BINARY_2_FIELD_TEXT.getBytes());
+
+ public static String BIG_BINARY_FIELD_TEXT;
+ public static final String BIG_BINARY_FIELD_KEY = "bigBinaryField";
+ public static Field bigBinaryField;
+
// public static Set fieldNamesSet = null;
// public static Set fieldValuesSet = null;
public static Map nameValues = null;
static
{
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i < 100000; i++)
+ buf.append("No matter where you go, there you are..\n");
+ BIG_BINARY_FIELD_TEXT = buf.toString();
+ bigBinaryField = Field.Binary(BIG_BINARY_FIELD_KEY, BIG_BINARY_FIELD_TEXT.getBytes());
nameValues = new HashMap();
nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
@@ -66,6 +83,9 @@
nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
+ nameValues.put(BINARY_FIELD_1_KEY, BINARY_1_FIELD_TEXT);
+ nameValues.put(BINARY_FIELD_2_KEY, BINARY_2_FIELD_TEXT);
+ nameValues.put(BIG_BINARY_FIELD_KEY, BIG_BINARY_FIELD_TEXT);
}
/**
@@ -79,7 +99,11 @@
doc.add(unIndField);
doc.add(unStoredField1);
doc.add(unStoredField2);
- }
+ doc.add(binaryField1);
+ doc.add(binaryField2);
+ doc.add(bigBinaryField);
+ }
+
/**
* Writes the document to the directory using a segment named "test"
* @param dir
Index: src/test/org/apache/lucene/index/TestDocumentWriter.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java,v
retrieving revision 1.2
diff -u -r1.2 TestDocumentWriter.java
--- src/test/org/apache/lucene/index/TestDocumentWriter.java 29 Mar 2004 22:48:06 -0000 1.2
+++ src/test/org/apache/lucene/index/TestDocumentWriter.java 3 Jun 2004 16:36:09 -0000
@@ -66,15 +66,27 @@
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
assertTrue(fields[0].isTermVectorStored() == true);
-
+
fields = doc.getFields("textField1");
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT));
assertTrue(fields[0].isTermVectorStored() == false);
-
+ assertTrue(fields[0].binaryValue() == null);
+
fields = doc.getFields("keyField");
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT));
+ assertTrue(fields[0].binaryValue() == null);
+
+ fields = doc.getFields("binaryField1");
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(new String(fields[0].binaryValue()).equals(DocHelper.BINARY_1_FIELD_TEXT));
+ assertTrue(fields[0].stringValue() == null);
+
+ fields = doc.getFields("bigBinaryField");
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(new String(fields[0].binaryValue()).equals(DocHelper.BIG_BINARY_FIELD_TEXT));
+
} catch (IOException e) {
e.printStackTrace();
assertTrue(false);
Index: src/test/org/apache/lucene/index/TestFieldInfos.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestFieldInfos.java,v
retrieving revision 1.1
diff -u -r1.1 TestFieldInfos.java
--- src/test/org/apache/lucene/index/TestFieldInfos.java 20 Feb 2004 20:14:55 -0000 1.1
+++ src/test/org/apache/lucene/index/TestFieldInfos.java 3 Jun 2004 16:36:09 -0000
@@ -34,7 +34,7 @@
FieldInfos fieldInfos = new FieldInfos();
fieldInfos.add(testDoc);
//Since the complement is stored as well in the fields map
- assertTrue(fieldInfos.size() == 7); //this is 7 b/c we are using the no-arg constructor
+ assertTrue(fieldInfos.size() == 10); //this is 10 b/c we are using the no-arg constructor
RAMDirectory dir = new RAMDirectory();
String name = "testFile";
OutputStream output = dir.createFile(name);
Index: src/test/org/apache/lucene/index/TestFieldsReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestFieldsReader.java,v
retrieving revision 1.2
diff -u -r1.2 TestFieldsReader.java
--- src/test/org/apache/lucene/index/TestFieldsReader.java 29 Mar 2004 22:48:06 -0000 1.2
+++ src/test/org/apache/lucene/index/TestFieldsReader.java 3 Jun 2004 16:36:09 -0000
@@ -68,6 +68,9 @@
Field field = doc.getField("textField2");
assertTrue(field != null);
assertTrue(field.isTermVectorStored() == true);
+ field = doc.getField("binaryField1");
+ assertTrue(field != null);
+ assertTrue(field.isBinary() == true);
reader.close();
} catch (IOException e) {
e.printStackTrace();
Index: src/test/org/apache/lucene/index/TestSegmentReader.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/test/org/apache/lucene/index/TestSegmentReader.java,v
retrieving revision 1.2
diff -u -r1.2 TestSegmentReader.java
--- src/test/org/apache/lucene/index/TestSegmentReader.java 29 Mar 2004 22:48:06 -0000 1.2
+++ src/test/org/apache/lucene/index/TestSegmentReader.java 3 Jun 2004 16:36:09 -0000
@@ -55,7 +55,7 @@
assertTrue(dir != null);
assertTrue(reader != null);
assertTrue(DocHelper.nameValues.size() > 0);
- assertTrue(DocHelper.numFields(testDoc) == 6);
+ assertTrue(DocHelper.numFields(testDoc) == 9);
}
public void testDocument() {
@@ -107,7 +107,7 @@
try {
Collection result = reader.getFieldNames();
assertTrue(result != null);
- assertTrue(result.size() == 7);
+ assertTrue(result.size() == 10);
for (Iterator iter = result.iterator(); iter.hasNext();) {
String s = (String) iter.next();
//System.out.println("Name: " + s);
@@ -124,7 +124,7 @@
result = reader.getFieldNames(false);
assertTrue(result != null);
- assertTrue(result.size() == 2);
+ assertTrue(result.size() == 5);
//Get all indexed fields that are storing term vectors
result = reader.getIndexedFieldNames(true);
assertTrue(result != null);